Spaces:

flax-community
/

roberta-base-mr

Runtime error

File size: 2,386 Bytes

e35b6a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97275be
e35b6a7
 
 
3523c5d
e35b6a7
97275be
 
 
 
 
 
3523c5d
a0fa70b
3523c5d
97275be
 
0bc5c62
 
3523c5d
 
0bc5c62
e35b6a7
 
2d2a075
e35b6a7
 
 
 
 
97275be
e35b6a7
2d2a075
 
97275be

import json

import streamlit as st
from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline

with open("config.json", encoding="utf8") as f:
    cfg = json.loads(f.read())


@st.cache(allow_output_mutation=True, show_spinner=False)
def load_model(input_text, model_name_or_path):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = RobertaForMaskedLM.from_pretrained(model_name_or_path)

    nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
    result = nlp(input_text)
    sentence, mask = result[0]["sequence"], result[0]["token_str"]
    return sentence, mask, result


def app():
    st.title("RoBERTa Marathi - मराठी भाषा")

    st.markdown(
        "This demo uses [RoBERTa for Marathi](https://huggingface.co/flax-community/roberta-base-mr) model "
        "trained on [mC4](https://huggingface.co/datasets/mc4)."
    )

    st.markdown(
        "❓Can't figure out where to get a sample text other than the predefined ones?❓\n\n"
        "Use any custom sentence with masked word or copy any headline from this [link](https://maharashtratimes.com/entertainment/articlelist/19359255.cms), and mask a word.\n"
        "> 📒 NOTE: Supports only single `<mask>` word"
    )

    masked_texts = [
        "मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
        "जॉनी लीवर यांनी नम्रता संभेरावला दिलं <mask> गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना"
        # "अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
    ]

    input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
    masked_text = st.text_input("Please type a masked sentence to fill", input_text)

    fill_button = st.button("Fill the Mask!")

    if fill_button:
        with st.spinner("Filling the Mask..."):
            filled_sentence, mask, raw_json = load_model(masked_text, cfg["models"]["RoBERTa"])

            st.markdown(f"**Filled sentence: **{filled_sentence}")
            st.markdown(f"**Predicted masked token: **{mask}")

            st.write(raw_json)