File size: 3,768 Bytes
e35b6a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3523c5d
e35b6a7
97275be
4aa97f4
bd074e8
4aa97f4
 
bd074e8
4aa97f4
3523c5d
97275be
 
35e3d4c
e35b6a7
35e3d4c
4aa97f4
 
 
 
 
 
 
 
bd074e8
35e3d4c
c5e1c4a
2d2a075
3523c5d
 
97275be
2d2a075
e35b6a7
2d2a075
97275be
 
 
 
e35b6a7
 
 
 
 
2d2a075
e35b6a7
1e8ecc2
4402f9d
 
0fedd5e
af35c1b
4aa97f4
8a5dee8
4aa97f4
 
 
 
 
 
af35c1b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import json

import streamlit as st
from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline

with open("config.json") as f:
    cfg = json.loads(f.read())


@st.cache(allow_output_mutation=True, show_spinner=False)
def load_model(input_text, model_name_or_path):
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)

    nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
    result = nlp(input_text)
    return result


def app():
    st.title("मराठी Marathi News Classifier")

    st.markdown(
        "This demo uses the below fine-tuned models for marathi news classification:\n"
        "- [IndicNLP Marathi News Classifier](https://huggingface.co/flax-community/mr-indicnlp-classifier) fine-tuned on "
        "[IndicNLP Marathi News Dataset](https://github.com/ai4bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset)\n"
        "> `IndicNLP` model predicts one of these 3 classes - `['lifestyle', 'entertainment', 'sports']`\n"
        "- [iNLTK Marathi News Classifier](https://huggingface.co/flax-community/mr-inltk-classifier) fine-tuned on "
        "[Marathi News Dataset](https://www.kaggle.com/disisbig/marathi-news-dataset)\n"
        "> `iNLTK` model predicts one of these 3 classes - `['state', 'entertainment', 'sports']`"
    )

    classifier = st.sidebar.selectbox("Select a Model", index=0, options=["IndicNLP", "iNLTK"])

    st.sidebar.markdown(
        "**IndicNLP Classes**\n"
        "- lifestyle\n"
        "- entertainment\n"
        "- sports\n"
        "\n"
        "**iNLTK Classes**\n"
        "- state\n"
        "- entertainment\n"
        "- sports"
    )

    sample_texts = [
        "रोहित शर्माने सरावाला सुरुवात करण्यापूर्वी भारतीय खेळाडूला दिला कानमंत्र, म्हणाला...",
        "जॉनी लीवर यांनी नम्रता संभेरावला दिलं खास गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना",
        "Custom",
    ]
    model_name_or_path = cfg["models"][classifier]

    text_to_classify = st.selectbox("Select a Text", options=sample_texts, index=len(sample_texts) - 1)

    if text_to_classify == "Custom":
        text_to_classify = st.text_input("Enter custom text:")

    predict_button = st.button("Predict")

    if predict_button:
        with st.spinner("Generating prediction..."):
            result = load_model(text_to_classify, model_name_or_path)

            st.markdown("## Predicted Label: `{}`".format(result[0]["label"]))
            st.markdown("## Confidence: `{}`%".format(round(result[0]["score"], 3) * 100))

    st.markdown("- - -")
    st.markdown(
        "❓ Can't figure out where to get a sample text other than the predefined ones? ❓\n"
        "\n"
        "We have provided Marathi newspaper links (section wise) below. Head over to any section of your choice, "
        "copy any headline and paste below to see if the model is predicting the respective class correctly or not?\n"
        "- [entertainment](https://maharashtratimes.com/entertainment/articlelist/19359255.cms)\n"
        "- [sports](https://maharashtratimes.com/sports/articlelist/2429056.cms)\n"
        "- [lifestyle](https://maharashtratimes.com/lifestyle-news/articlelist/2429025.cms)\n"
        "- [state](https://maharashtratimes.com/maharashtra/articlelist/2429066.cms)\n"
        "> 📒 NOTE: Both models are not trained on above headlines! Feel free to use any headline from any newspaper"
    )