Spaces:
Runtime error
Runtime error
Added different pages for MLM and Classification
Browse files- app.py +9 -25
- apps/classifier.py +35 -0
- apps/mlm.py +47 -0
- config.json +5 -1
- multiapp.py +14 -0
app.py
CHANGED
@@ -1,31 +1,15 @@
|
|
1 |
-
import json
|
2 |
-
|
3 |
import streamlit as st
|
4 |
-
from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
|
5 |
-
|
6 |
-
|
7 |
-
with open("config.json") as f:
|
8 |
-
cfg = json.loads(f.read())
|
9 |
-
|
10 |
-
|
11 |
-
@st.cache(allow_output_mutation=True)
|
12 |
-
def load_model(input_text):
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained(cfg["model_name_or_path"])
|
14 |
-
model = RobertaForSequenceClassification.from_pretrained(cfg["model_name_or_path"])
|
15 |
-
|
16 |
-
nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
17 |
-
result = nlp(input_text)
|
18 |
-
return result
|
19 |
|
|
|
|
|
20 |
|
21 |
-
st.title("RoBERTa Marathi")
|
22 |
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
predict_button = st.button("Predict")
|
26 |
|
27 |
-
if
|
28 |
-
|
29 |
-
# Get prediction here
|
30 |
-
result = load_model(input_text)
|
31 |
-
st.write(result)
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
from apps import classifier, mlm
|
4 |
+
from multiapp import MultiApp
|
5 |
|
|
|
6 |
|
7 |
+
def main():
|
8 |
+
app = MultiApp()
|
9 |
+
app.add_app("Fill Mask", mlm.app)
|
10 |
+
app.add_app("Text Classification", classifier.app)
|
11 |
+
app.run()
|
12 |
|
|
|
13 |
|
14 |
+
if __name__ == "__main__":
|
15 |
+
main()
|
|
|
|
|
|
apps/classifier.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
|
5 |
+
|
6 |
+
with open("config.json") as f:
|
7 |
+
cfg = json.loads(f.read())
|
8 |
+
|
9 |
+
|
10 |
+
@st.cache(allow_output_mutation=True, show_spinner=False)
|
11 |
+
def load_model(input_text, model_name_or_path):
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
13 |
+
model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)
|
14 |
+
|
15 |
+
nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
16 |
+
result = nlp(input_text)
|
17 |
+
return result
|
18 |
+
|
19 |
+
|
20 |
+
def app():
|
21 |
+
st.title("RoBERTa Marathi")
|
22 |
+
|
23 |
+
classifier = st.sidebar.selectbox("Select a Model", index=0, options=["Indic NLP", "iNLTK"])
|
24 |
+
|
25 |
+
model_name_or_path = cfg["models"][classifier]
|
26 |
+
input_text = st.text_input("Text:")
|
27 |
+
|
28 |
+
predict_button = st.button("Predict")
|
29 |
+
|
30 |
+
if predict_button:
|
31 |
+
with st.spinner("Generating prediction..."):
|
32 |
+
# Get prediction here
|
33 |
+
result = load_model(input_text, model_name_or_path)
|
34 |
+
|
35 |
+
st.markdown("**Predicted label:** " + result[0]["label"])
|
apps/mlm.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline
|
5 |
+
|
6 |
+
with open("config.json", encoding="utf8") as f:
|
7 |
+
cfg = json.loads(f.read())
|
8 |
+
|
9 |
+
sample_texts = [
|
10 |
+
{
|
11 |
+
"original_text": "मोठी बातमी! उद्या दुपारी १ वाजता जाहीर होणार दहावीचा निकाल",
|
12 |
+
"masked_text": "मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"original_text": "अध्यक्ष शरद पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
|
16 |
+
"masked_text": "अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
|
17 |
+
},
|
18 |
+
]
|
19 |
+
|
20 |
+
|
21 |
+
@st.cache(allow_output_mutation=True, show_spinner=False)
|
22 |
+
def load_model(input_text, model_name_or_path):
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
24 |
+
model = RobertaForMaskedLM.from_pretrained(model_name_or_path)
|
25 |
+
|
26 |
+
nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
|
27 |
+
result = nlp(input_text)
|
28 |
+
sentence, mask = result[0]["sequence"], result[0]["token_str"]
|
29 |
+
return sentence, mask
|
30 |
+
|
31 |
+
|
32 |
+
def app():
|
33 |
+
st.title("RoBERTa Marathi")
|
34 |
+
|
35 |
+
masked_texts = [example["masked_text"] for example in sample_texts]
|
36 |
+
original_texts = [example["original_text"] for example in sample_texts]
|
37 |
+
|
38 |
+
input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
|
39 |
+
masked_text = st.text_area("Please type a masked sentence to fill", input_text)
|
40 |
+
|
41 |
+
fill_button = st.button("Fill the Mask!")
|
42 |
+
|
43 |
+
if fill_button:
|
44 |
+
with st.spinner("Filling the Mask..."):
|
45 |
+
filled_sentence, mask = load_model(masked_text, cfg["models"]["RoBERTa"])
|
46 |
+
|
47 |
+
st.markdown(f"**Filled sentence: ** {filled_sentence}\n\n**Predicted masked token: **{mask}")
|
config.json
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
|
|
|
|
|
|
3 |
}
|
|
|
1 |
{
|
2 |
+
"models": {
|
3 |
+
"Indic NLP": "flax-community/mr-indicnlp-classifier",
|
4 |
+
"iNLTK": "flax-community/mr-inltk-classifier",
|
5 |
+
"RoBERTa": "flax-community/roberta-base-mr"
|
6 |
+
}
|
7 |
}
|
multiapp.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
|
4 |
+
class MultiApp:
|
5 |
+
def __init__(self):
|
6 |
+
self.apps = []
|
7 |
+
|
8 |
+
def add_app(self, title, func):
|
9 |
+
self.apps.append({"title": title, "function": func})
|
10 |
+
|
11 |
+
def run(self):
|
12 |
+
st.sidebar.header("Tasks")
|
13 |
+
app = st.sidebar.radio("", self.apps, format_func=lambda app: app["title"])
|
14 |
+
app["function"]()
|