Spaces:

flax-community
/

roberta-base-mr

Runtime error

App Files Files Community

hassiahk commited on Jul 17, 2021

Commit

e35b6a7

•

1 Parent(s): f5b5a13

Added different pages for MLM and Classification

Browse files

Files changed (5) hide show

app.py +9 -25
apps/classifier.py +35 -0
apps/mlm.py +47 -0
config.json +5 -1
multiapp.py +14 -0

app.py CHANGED Viewed

@@ -1,31 +1,15 @@
-import json
 import streamlit as st
-from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
-with open("config.json") as f:
- cfg = json.loads(f.read())
-@st.cache(allow_output_mutation=True)
-def load_model(input_text):
- tokenizer = AutoTokenizer.from_pretrained(cfg["model_name_or_path"])
- model = RobertaForSequenceClassification.from_pretrained(cfg["model_name_or_path"])
- nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
- result = nlp(input_text)
- return result
-st.title("RoBERTa Marathi")
-input_text = st.text_input("Text:")
-predict_button = st.button("Predict")
-if predict_button:
- with st.spinner("Generating prediction..."):
- # Get prediction here
- result = load_model(input_text)
- st.write(result)

 import streamlit as st
+from apps import classifier, mlm
+from multiapp import MultiApp
+def main():
+ app = MultiApp()
+ app.add_app("Fill Mask", mlm.app)
+ app.add_app("Text Classification", classifier.app)
+ app.run()
+if __name__ == "__main__":
+ main()

apps/classifier.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+import streamlit as st
+from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
+with open("config.json") as f:
+ cfg = json.loads(f.read())
+@st.cache(allow_output_mutation=True, show_spinner=False)
+def load_model(input_text, model_name_or_path):
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+ model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)
+ nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
+ result = nlp(input_text)
+ return result
+def app():
+ st.title("RoBERTa Marathi")
+ classifier = st.sidebar.selectbox("Select a Model", index=0, options=["Indic NLP", "iNLTK"])
+ model_name_or_path = cfg["models"][classifier]
+ input_text = st.text_input("Text:")
+ predict_button = st.button("Predict")
+ if predict_button:
+ with st.spinner("Generating prediction..."):
+ # Get prediction here
+ result = load_model(input_text, model_name_or_path)
+ st.markdown("**Predicted label:** " + result[0]["label"])

apps/mlm.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import json
+import streamlit as st
+from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline
+with open("config.json", encoding="utf8") as f:
+ cfg = json.loads(f.read())
+sample_texts = [
+ {
+ "original_text": "मोठी बातमी! उद्या दुपारी १ वाजता जाहीर होणार दहावीचा निकाल",
+ "masked_text": "मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
+ },
+ {
+ "original_text": "अध्यक्ष शरद पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
+ "masked_text": "अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
+ },
+]
+@st.cache(allow_output_mutation=True, show_spinner=False)
+def load_model(input_text, model_name_or_path):
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+ model = RobertaForMaskedLM.from_pretrained(model_name_or_path)
+ nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
+ result = nlp(input_text)
+ sentence, mask = result[0]["sequence"], result[0]["token_str"]
+ return sentence, mask
+def app():
+ st.title("RoBERTa Marathi")
+ masked_texts = [example["masked_text"] for example in sample_texts]
+ original_texts = [example["original_text"] for example in sample_texts]
+ input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
+ masked_text = st.text_area("Please type a masked sentence to fill", input_text)
+ fill_button = st.button("Fill the Mask!")
+ if fill_button:
+ with st.spinner("Filling the Mask..."):
+ filled_sentence, mask = load_model(masked_text, cfg["models"]["RoBERTa"])
+ st.markdown(f"**Filled sentence: ** {filled_sentence}\n\n**Predicted masked token: **{mask}")

config.json CHANGED Viewed

@@ -1,3 +1,7 @@
 {
- "model_name_or_path": "flax-community/mr-indicnlp-classifier"
 }

 {
+ "models": {
+ "Indic NLP": "flax-community/mr-indicnlp-classifier",
+ "iNLTK": "flax-community/mr-inltk-classifier",
+ "RoBERTa": "flax-community/roberta-base-mr"
+ }
 }

multiapp.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import streamlit as st
+class MultiApp:
+ def __init__(self):
+ self.apps = []
+ def add_app(self, title, func):
+ self.apps.append({"title": title, "function": func})
+ def run(self):
+ st.sidebar.header("Tasks")
+ app = st.sidebar.radio("", self.apps, format_func=lambda app: app["title"])
+ app["function"]()