english2darija / app.py
Essa20001's picture
Update app.py
2eec867 verified
raw
history blame contribute delete
910 Bytes
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
import streamlit as st
MAX_LENGTH = 184
SPECIAL_WORD = "[TODARIJA]"
model = AutoModelForSeq2SeqLM.from_pretrained("ckpt").eval()
tokenizer = T5Tokenizer.from_pretrained("ckpt")
st.set_page_config("English to darija ")
st.title('English to Darija Translation machine by fine-tuning T5 model on Darija Open Dataset')
sentence = st.text_input("input your english text")
button = st.button("translate to Darija")
if button :
sentence = sentence.lower()
sentence = SPECIAL_WORD+" "+sentence
length = len(sentence.split())
if length < MAX_LENGTH-1:
inputs = tokenizer(sentence, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
outputs =model.generate(**inputs,max_length=MAX_LENGTH)
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
st.text(decoded_output)