Spaces:

neel692
/

Abusive-Comment-Detection

Running

neelsahu

a case for unknown Language

d4ba74c 14 days ago

2.49 kB

	import gradio as gr
	from gradio.components import Text
	import joblib
	import clean
	import nltk
	nltk.download('wordnet')
	import numpy as np
	import language_detection
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch

	print("all imports worked")
	# Load pre-trained model
	model = joblib.load('model_joblib.pkl')
	print("model load ")
	tf = joblib.load('tf_joblib.pkl')
	print("tfidf load ")

	# Load Hindi abuse detection model
	hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
	hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
	print("Hindi model loaded")

	def predict_hindi_text(text):
	inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
	outputs = hindi_model(**inputs)
	predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
	scores = predictions[0].detach().numpy()
	return scores

	# Define function to predict whether sentence is abusive or not
	def predict_abusive_lang(text):
	print("original text ", text)

	lang = language_detection.en_hi_detection(text)
	print("language detected ", lang)

	if lang=='eng':
	cleaned_text = clean.text_cleaning(text)
	print("cleaned text ", text)
	text = tf.transform([cleaned_text])
	print("tfidf transformation ", text)
	prediction = model.predict(text)
	print("prediction ", prediction)
	if len(prediction)!=0 and prediction[0]==0:
	return ["Not Abusive", cleaned_text]
	elif len(prediction)!=0 and prediction[0]==1:
	return ["Abusive",cleaned_text]
	else :
	return ["Please write something in the comment box..","No cleaned text"]
	elif lang=='hi':
	print("using transformers for Hindi text")
	scores = predict_hindi_text(text)
	if scores[1] > scores[0]: # If score for abusive class is higher
	return ["Abusive", text]
	else:
	return ["Not Abusive", text]
	else:
	return ["Uknown Language","No cleaned text"]

	# Define the GRADIO output interfaces
	output_interfaces = [
	gr.Textbox(label="Result"),
	gr.Textbox(label="Cleaned text")
	]
	app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
	#Start the GRADIO app
	app.launch()