File size: 2,130 Bytes
2b81c45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6efb1cb
2b81c45
 
 
 
 
2fc1b77
 
ee468b5
2b81c45
 
 
 
2fc1b77
 
ee468b5
2b81c45
 
 
 
2fc1b77
 
ee468b5
2b81c45
 
 
 
2fc1b77
 
2b81c45
b34faf5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import streamlit as st
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import AutoModelForMaskedLM, AutoTokenizer

# Load the models and tokenizers
model_translation = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
model_masked_lm = AutoModelForMaskedLM.from_pretrained("alabnii/jmedroberta-base-sentencepiece")
model_translation.eval()

tokenizer_translation = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
tokenizer_masked_lm = AutoTokenizer.from_pretrained("alabnii/jmedroberta-base-sentencepiece")

text = st.text_area('Enter the text:')

if text:
    model_inputs = tokenizer_translation(text, return_tensors="pt")
    lg = st.number_input("Select Language: 1.Hindi, 2.Telugu, 3.Gujarati, 4.Bengali")
    if lg==1:
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["hi_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.json(translation)
    elif lg==2:
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["te_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.json(translation)
    elif lg==3:
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["gu_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.json(translation)
    elif lg==4:
        generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["bn_IN"]
        )
        translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
        st.json(translation)
    else:
        st.write('invalid choice!')