|
|
|
|
|
|
|
|
|
|
|
import transformers |
|
import gradio as gr |
|
import torch |
|
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base") |
|
|
|
def paraphrase( |
|
question, |
|
num_beams=5, |
|
num_beam_groups=5, |
|
num_return_sequences=5, |
|
repetition_penalty=10.0, |
|
diversity_penalty=3.0, |
|
no_repeat_ngram_size=2, |
|
temperature=0.7, |
|
max_length=128 |
|
): |
|
input_ids = tokenizer( |
|
f'paraphrase: {question}', |
|
return_tensors="pt", padding="longest", |
|
max_length=max_length, |
|
truncation=True, |
|
).input_ids |
|
|
|
outputs = model.generate( |
|
input_ids, temperature=temperature, repetition_penalty=repetition_penalty, |
|
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size, |
|
num_beams=num_beams, num_beam_groups=num_beam_groups, |
|
max_length=max_length, diversity_penalty=diversity_penalty |
|
) |
|
|
|
res = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
res1 = res [0] |
|
res2 = res [1] |
|
res3 = res [3] |
|
res4 = res [4] |
|
|
|
return res1, res2, res3 |
|
|
|
|
|
iface = gr.Interface(fn=paraphrase, |
|
inputs=["text"], |
|
outputs=["text","text","text"], |
|
title="AI Paraphraser", |
|
description="Paste text in the input box and press 'Submit'. The output need not be better than the original.", |
|
examples=[ |
|
["Therefore, when we share we need to create real hype for users to want to be involved."], |
|
["Ideas like this I am open to your suggestions, so we can really push through."], |
|
["The main goal is for readers/users to feel the need to purchase the product."], |
|
["The weather is getting more and more unpredictable these days."], |
|
], |
|
cache_examples=True, |
|
) |
|
|
|
iface.launch() |