File size: 1,438 Bytes
d07f696
 
c649c75
d07f696
 
 
 
 
d3fdda2
174d87f
d07f696
 
 
 
9f3a390
d07f696
 
 
9f3a390
d07f696
 
 
 
 
 
 
 
 
 
 
 
174d87f
d07f696
174d87f
d07f696
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import streamlit as st
import torch
import sacremoses
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import FSMTForConditionalGeneration, FSMTTokenizer

st.title("Paraphraser Three -- Back Translation")
st.write("Paraphrase means to express meaning using different words. Back Translation refers to the method by which the computer paraphrases.")
st.write("Write or paste an English language sentence below, and enter. The machine will translate your sentence into another language using one language model. The machine will then translate that sentence into English using another language model.")

user_input = st.text_area("Input sentence.")

def load_en2de():
  en2de = pipeline("translation_en_to_de", model="t5-base")
  return en2de
  
def load_de2en():
  model_name = "facebook/wmt19-de-en"
  tokenizer = FSMTTokenizer.from_pretrained(model_name)
  model_de_to_en = FSMTForConditionalGeneration.from_pretrained(model_name)
  return tokenizer, model_de_to_en
  
en2de = load_en2de()
tokenizer_de2en, de2en = load_de2en()

en_to_de_output = en2de(user_input)
translated_text = en_to_de_output[0]['translation_text']

input_ids = tokenizer_de2en.encode(translated_text, return_tensors="pt")
output_ids = de2en.generate(input_ids)[0]
augmented_text = tokenizer_de2en.decode(output_ids, skip_special_tokens=True)

st.write("Paraphrased sentence: ", augmented_text)