hassiahk's picture
Added model hub links
97275be
raw
history blame
1.99 kB
import json
import streamlit as st
from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline
with open("config.json", encoding="utf8") as f:
cfg = json.loads(f.read())
@st.cache(allow_output_mutation=True, show_spinner=False)
def load_model(input_text, model_name_or_path):
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = RobertaForMaskedLM.from_pretrained(model_name_or_path)
nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
result = nlp(input_text)
sentence, mask = result[0]["sequence"], result[0]["token_str"]
return sentence, mask, result
def app():
st.title("RoBERTa Marathi")
st.markdown(
"This demo uses [RoBERTa for Marathi](https://huggingface.co/flax-community/roberta-base-mr) model "
"trained on [mC4](https://huggingface.co/datasets/mc4)."
)
st.markdown(
"Can't figure out where to get a sample text? Visit this "
"[link](https://maharashtratimes.com/entertainment/articlelist/19359255.cms), copy any headline and mask a word."
)
masked_texts = [
"मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
"अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
]
input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
masked_text = st.text_input("Please type a masked sentence to fill", input_text)
fill_button = st.button("Fill the Mask!")
if fill_button:
with st.spinner("Filling the Mask..."):
filled_sentence, mask, raw_json = load_model(masked_text, cfg["models"]["RoBERTa"])
st.markdown(f"**Filled sentence: **{filled_sentence}")
st.markdown(f"**Predicted masked token: **{mask}")
st.write(raw_json)