File size: 2,386 Bytes
e35b6a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97275be
e35b6a7
 
 
3523c5d
e35b6a7
97275be
 
 
 
 
 
3523c5d
a0fa70b
3523c5d
97275be
 
0bc5c62
 
3523c5d
 
0bc5c62
e35b6a7
 
2d2a075
e35b6a7
 
 
 
 
97275be
e35b6a7
2d2a075
 
97275be
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import json

import streamlit as st
from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline

with open("config.json", encoding="utf8") as f:
    cfg = json.loads(f.read())


@st.cache(allow_output_mutation=True, show_spinner=False)
def load_model(input_text, model_name_or_path):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = RobertaForMaskedLM.from_pretrained(model_name_or_path)

    nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
    result = nlp(input_text)
    sentence, mask = result[0]["sequence"], result[0]["token_str"]
    return sentence, mask, result


def app():
    st.title("RoBERTa Marathi - मराठी भाषा")

    st.markdown(
        "This demo uses [RoBERTa for Marathi](https://huggingface.co/flax-community/roberta-base-mr) model "
        "trained on [mC4](https://huggingface.co/datasets/mc4)."
    )

    st.markdown(
        "❓Can't figure out where to get a sample text other than the predefined ones?❓\n\n"
        "Use any custom sentence with masked word or copy any headline from this [link](https://maharashtratimes.com/entertainment/articlelist/19359255.cms), and mask a word.\n"
        "> 📒 NOTE: Supports only single `<mask>` word"
    )

    masked_texts = [
        "मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
        "जॉनी लीवर यांनी नम्रता संभेरावला दिलं <mask> गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना"
        # "अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
    ]

    input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
    masked_text = st.text_input("Please type a masked sentence to fill", input_text)

    fill_button = st.button("Fill the Mask!")

    if fill_button:
        with st.spinner("Filling the Mask..."):
            filled_sentence, mask, raw_json = load_model(masked_text, cfg["models"]["RoBERTa"])

            st.markdown(f"**Filled sentence: **{filled_sentence}")
            st.markdown(f"**Predicted masked token: **{mask}")

            st.write(raw_json)