|
import gradio as gr |
|
from gradio.components import Text |
|
import joblib |
|
import clean |
|
import nltk |
|
nltk.download('wordnet') |
|
import numpy as np |
|
import language_detection |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
import torch |
|
|
|
print("all imports worked") |
|
|
|
model = joblib.load('model_joblib.pkl') |
|
print("model load ") |
|
tf = joblib.load('tf_joblib.pkl') |
|
print("tfidf load ") |
|
|
|
|
|
hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL") |
|
hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL") |
|
print("Hindi model loaded") |
|
|
|
def predict_hindi_text(text): |
|
inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True) |
|
outputs = hindi_model(**inputs) |
|
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
scores = predictions[0].detach().numpy() |
|
return scores |
|
|
|
|
|
def predict_abusive_lang(text): |
|
print("original text ", text) |
|
|
|
lang = language_detection.en_hi_detection(text) |
|
print("language detected ", lang) |
|
|
|
if lang=='eng': |
|
cleaned_text = clean.text_cleaning(text) |
|
print("cleaned text ", text) |
|
text = tf.transform([cleaned_text]) |
|
print("tfidf transformation ", text) |
|
prediction = model.predict(text) |
|
print("prediction ", prediction) |
|
if len(prediction)!=0 and prediction[0]==0: |
|
return ["Not Abusive", cleaned_text] |
|
elif len(prediction)!=0 and prediction[0]==1: |
|
return ["Abusive",cleaned_text] |
|
else : |
|
return ["Please write something in the comment box..","No cleaned text"] |
|
elif lang=='hi': |
|
print("using transformers for Hindi text") |
|
scores = predict_hindi_text(text) |
|
if scores[1] > scores[0]: |
|
return ["Abusive", text] |
|
else: |
|
return ["Not Abusive", text] |
|
else: |
|
return ["Uknown Language","No cleaned text"] |
|
|
|
|
|
output_interfaces = [ |
|
gr.Textbox(label="Result"), |
|
gr.Textbox(label="Cleaned text") |
|
] |
|
app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.") |
|
|
|
app.launch() |