Spaces:

flax-community
/

roberta-base-mr

Runtime error

App Files Files Community

roberta-base-mr / apps /mlm.py

nipunsadvilkar

Use h2 class

a0fa70b over 3 years ago

raw

history blame

2.39 kB

	import json

	import streamlit as st
	from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline

	with open("config.json", encoding="utf8") as f:
	cfg = json.loads(f.read())


	@st.cache(allow_output_mutation=True, show_spinner=False)
	def load_model(input_text, model_name_or_path):
	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
	model = RobertaForMaskedLM.from_pretrained(model_name_or_path)

	nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
	result = nlp(input_text)
	sentence, mask = result[0]["sequence"], result[0]["token_str"]
	return sentence, mask, result


	def app():
	st.title("RoBERTa Marathi - मराठी भाषा")

	st.markdown(
	"This demo uses [RoBERTa for Marathi](https://huggingface.co/flax-community/roberta-base-mr) model "
	"trained on [mC4](https://huggingface.co/datasets/mc4)."
	)

	st.markdown(
	"❓Can't figure out where to get a sample text other than the predefined ones?❓\n\n"
	"Use any custom sentence with masked word or copy any headline from this [link](https://maharashtratimes.com/entertainment/articlelist/19359255.cms), and mask a word.\n"
	"> 📒 NOTE: Supports only single `<mask>` word"
	)

	masked_texts = [
	"मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
	"जॉनी लीवर यांनी नम्रता संभेरावला दिलं <mask> गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना"
	# "अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
	]

	input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
	masked_text = st.text_input("Please type a masked sentence to fill", input_text)

	fill_button = st.button("Fill the Mask!")

	if fill_button:
	with st.spinner("Filling the Mask..."):
	filled_sentence, mask, raw_json = load_model(masked_text, cfg["models"]["RoBERTa"])

	st.markdown(f"Filled sentence: {filled_sentence}")
	st.markdown(f"Predicted masked token: {mask}")

	st.write(raw_json)