File size: 904 Bytes
74ac292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
import streamlit as st 

MAX_LENGTH = 184
SPECIAL_WORD = "[TODARIJA]"
model = AutoModelForSeq2SeqLM.from_pretrained("ckpt")
tokenizer  = T5Tokenizer.from_pretrained("ckpt")


st.set_page_config("English to darija ")
st.title('English to Darija  Translation machine by fine-tuning T5 model on Darija Open Dataset')

sentence = st.text_input("input your english text")

button = st.button("translate to Darija")

if button : 

    sentence = SPECIAL_WORD+" "+sentence
    sentence = sentence.lower()
    length = len(sentence.split())
    if length < MAX_LENGTH-1:
        inputs = tokenizer(sentence, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
        outputs =model.generate(**inputs,max_length=MAX_LENGTH)

        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        st.text(decoded_output)