summarize / paraphrase.py
shamim237's picture
Update paraphrase.py
a4d296e
raw
history blame
3.84 kB
import re
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@st.cache(allow_output_mutation=True)
def do_paraphrase(model_name):
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
return model
@st.cache(allow_output_mutation=True)
def do_tokenize(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
return tokenizer
model = do_paraphrase("ramsrigouthamg/t5-large-paraphraser-diverse-high-quality")
tokenizer = do_tokenize("ramsrigouthamg/t5-large-paraphraser-diverse-high-quality")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
def para(paragraph):
sen = []
for i in paragraph:
res = len(re.findall(r'\w+', i))
if res == 2:
pass
else:
res = i.replace('"', "'").replace("\n", "")
sen.append(res)
para = []
for sentence in sen:
text = "paraphrase: " + sentence + " </s>"
encoding = tokenizer.encode_plus(text,max_length = 256, padding=True, return_tensors="pt")
input_ids,attention_mask = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
model.eval()
beam_outputs = model.generate(
input_ids=input_ids,attention_mask=attention_mask,
max_length= 256,
early_stopping=True,
num_beams=15,
num_return_sequences=3)
#for beam_output in beam_outputs:
sent = tokenizer.decode(beam_outputs[2], skip_special_tokens=True,clean_up_tokenization_spaces=True)
para.append(sent)
paras = []
for i in para:
resf = i.replace("paraphrasedoutput: ", "")
paras.append(resf)
print('para start---')
return paras
# pas = ['Product Description', 'EIGIIS military smart watch has a 10 military grade certifications and can be used under the harsh environmental conditions, such as 70 ℃ heat resistance, -40 ℃ cold resistance; 120 hours resistance to humidity, 96 hours resistance to salt spray.',
# 'EIGIIS military smart watch has a variety of exercise modes, including running, walking, cycling, skipping, basketball, football, badminton, etc. It can perform real-time heart rate monitoring and accurately record all-day activities, such as pedometer, distance, calories burned, heart rate and active time through intelligent exercise algorithms.',
# 'After connecting to Bluetooth, you can make calls on your EIGIIS smart watch K22, and you can view various notifications such as SMS and SNS (Facebook, WhatsApp, Instagram, Wechat and Twitter ) through your smart watch without turning on your phone.',
# 'BLUETOOTH CALLING', 'You can make calls and answer the calls directly by your watch. Besides, it can also supports call rejection and synchronize your recent calls.',
# 'SMART NOTIFICATIONS', 'Using message and call notification, you will never miss any important calls and messages at all!',
# 'MUSIC CONTROL', 'You can even use it to play music outside. Anytime, anywhere brings you a convenient life.',
# 'SLEEP MONITORING', 'Sleep monitoring records how many times you wake up/light sleep/deep sleep/REM sleep.',
# 'HEALTH MONITORING', 'Always caring about your physical health, you can measure your blood pressure and heart rate anytime and anywhere. And all measured data can be sync on the mobile phone. You can get a better understanding of your health by analyze the corresponding data.',
# 'WEATHER FORECAST', 'You can check the weather all over the country, providing you more convenience in your daily life.',
# 'EIGIIS military smart watch has 100+ dails for you to choose in "Da Fit" APP. You can switch the UI arbitrarily by long pressing the watch dial. Let you have a different experience.']
# pa = para(pas)
# print(pa)