File size: 2,147 Bytes
2b81c45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f4bec9
 
2b81c45
 
 
 
2fc1b77
1f4bec9
 
2b81c45
 
 
 
2fc1b77
1f4bec9
 
2b81c45
 
 
 
2fc1b77
1f4bec9
 
2b81c45
 
 
 
2fc1b77
 
2b81c45
b34faf5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import AutoModelForMaskedLM, AutoTokenizer

# Load the models and tokenizers
model_translation = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
model_masked_lm = AutoModelForMaskedLM.from_pretrained("alabnii/jmedroberta-base-sentencepiece")
model_translation.eval()

tokenizer_translation = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
tokenizer_masked_lm = AutoTokenizer.from_pretrained("alabnii/jmedroberta-base-sentencepiece")

text = st.text_area('Enter the text:')

if text:
    model_inputs = tokenizer_translation(text, return_tensors="pt")
    lg = st.text_input("Select Language: hi.Hindi, te.Telugu, gu.Gujarati, bn.Bengali")
    if lg=='hi':
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["hi_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.write(translation)
    elif lg=='te':
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["te_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.write(translation)
    elif lg=='gu':
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["gu_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.write(translation)
    elif lg=='bn':
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["bn_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.json(translation)
    else:
        st.write('invalid choice!')