Spaces:

flax-community
/

roberta-base-mr

Runtime error

App Files Files Community

roberta-base-mr / apps /classifier.py

hassiahk

Added an horizontal line

0fedd5e over 3 years ago

raw

history blame

3.77 kB

	import json

	import streamlit as st
	from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline

	with open("config.json") as f:
	cfg = json.loads(f.read())


	@st.cache(allow_output_mutation=True, show_spinner=False)
	def load_model(input_text, model_name_or_path):
	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
	model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)

	nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
	result = nlp(input_text)
	return result


	def app():
	st.title("मराठी Marathi News Classifier")

	st.markdown(
	"This demo uses the below fine-tuned models for marathi news classification:\n"
	"- [IndicNLP Marathi News Classifier](https://huggingface.co/flax-community/mr-indicnlp-classifier) fine-tuned on "
	"[IndicNLP Marathi News Dataset](https://github.com/ai4bharat/indicnlp_corpus#indicnlp-news-article-classification-dataset)\n"
	"> `IndicNLP` model predicts one of these 3 classes - `['lifestyle', 'entertainment', 'sports']`\n"
	"- [iNLTK Marathi News Classifier](https://huggingface.co/flax-community/mr-inltk-classifier) fine-tuned on "
	"[Marathi News Dataset](https://www.kaggle.com/disisbig/marathi-news-dataset)\n"
	"> `iNLTK` model predicts one of these 3 classes - `['state', 'entertainment', 'sports']`"
	)

	classifier = st.sidebar.selectbox("Select a Model", index=0, options=["IndicNLP", "iNLTK"])

	st.sidebar.markdown(
	"IndicNLP Classes\n"
	"- lifestyle\n"
	"- entertainment\n"
	"- sports\n"
	"\n"
	"iNLTK Classes\n"
	"- state\n"
	"- entertainment\n"
	"- sports"
	)

	sample_texts = [
	"रोहित शर्माने सरावाला सुरुवात करण्यापूर्वी भारतीय खेळाडूला दिला कानमंत्र, म्हणाला...",
	"जॉनी लीवर यांनी नम्रता संभेरावला दिलं खास गिफ्ट, अभिनेत्रीने व्यक्त केल्या भावना",
	"Custom",
	]
	model_name_or_path = cfg["models"][classifier]

	text_to_classify = st.selectbox("Select a Text", options=sample_texts, index=len(sample_texts) - 1)

	if text_to_classify == "Custom":
	text_to_classify = st.text_input("Enter custom text:")

	predict_button = st.button("Predict")

	if predict_button:
	with st.spinner("Generating prediction..."):
	result = load_model(text_to_classify, model_name_or_path)

	st.markdown("## Predicted Label: `{}`".format(result[0]["label"]))
	st.markdown("## Confidence: `{}`%".format(round(result[0]["score"], 3) * 100))

	st.markdown("- - -")
	st.markdown(
	"❓ Can't figure out where to get a sample text other than the predefined ones? ❓\n"
	"\n"
	"We have provided Marathi newspaper links (section wise) below. Head over to any section of your choice, "
	"copy any headline and paste below to see if the model is predicting the respective class correctly or not?\n"
	"- [entertainment](https://maharashtratimes.com/entertainment/articlelist/19359255.cms)\n"
	"- [sports](https://maharashtratimes.com/sports/articlelist/2429056.cms)\n"
	"- [lifestyle](https://maharashtratimes.com/lifestyle-news/articlelist/2429025.cms)\n"
	"- [state](https://maharashtratimes.com/maharashtra/articlelist/2429066.cms)\n"
	"> 📒 NOTE: Both models are not trained on above headlines! Feel free to use any headline from any newspaper"
	)