Spaces:

IoriU
/

HateSpeechPredictor

Sleeping

App Files Files Community

hk-bt-rnd commited on Mar 28

Commit

ad5ee12

•

1 Parent(s): c5550af

Init space

Browse files

Files changed (5) hide show

__pycache__/model.cpython-310.pyc +0 -0
app.py +87 -0
model.py +37 -0
requirements.txt +76 -0
weight.pt +3 -0

__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (1.24 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+from matplotlib import cm
+import torch
+from transformers import AutoTokenizer, AutoModel, AutoConfig
+from model import Classifier
+import torch.nn as nn
+import torch.nn.functional as F
+# Load model directly
+MODEL_NAME = "cahya/roberta-base-indonesian-522M"
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+class_names = ['Action', 'Adventure', 'Comedy', 'Drama', 'Fantasy', 'Romance', 'Sci-Fi']
+config = AutoConfig.from_pretrained(MODEL_NAME)
+transformer = AutoModel.from_pretrained(MODEL_NAME)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+cp = torch.load(r"weight.pt", map_location="cpu")
+transformer.load_state_dict(cp['w_t'])
+classifier = Classifier(input_size = config.hidden_size, output_sizes = [1, 1, 1, 3, 5])
+classifier.load_state_dict(cp['w_c'])
+transformer.to(device)
+classifier.to(device)
+target_names = ["Individual", 'Group']
+strength_names = ["Weak", 'Moderate', 'Strong']
+type_names = ['Religion','Race','Physical','Gender','Other']
+act_sig = nn.Sigmoid()
+act_soft = nn.Softmax()
+def predict(sentence):
+    # Tokenize the input sentence
+    inputs = tokenizer(sentence,
+                        add_special_tokens = True, \
+                        max_length = 256, \
+                        padding = "max_length", \
+                        truncation = True,
+                        return_tensors='pt')
+    input_ids = inputs['input_ids'].to(device)
+    att_masks = inputs['attention_mask'].to(device)
+    # Get model predictions
+    with torch.no_grad():
+        out = transformer(input_ids, attention_mask=att_masks)
+        logits = out.pooler_output
+        out = classifier(logits)
+        hs_out, abusive_out, target_out, strength_out, type_out = out[0], out[1], out[2], out[3], out[4]
+        hs_act, abusive_act, target_act, strength_act, type_act = act_sig(hs_out).squeeze(), \
+            act_sig(abusive_out).squeeze(), act_sig(target_out).squeeze(0), act_soft(strength_out), act_sig(type_out).squeeze(0)
+    # Interpret the predictions
+    is_hate_speech = bool(hs_act >= 0.5)
+    is_abusive = bool(abusive_act >= 0.5)
+    hate_speech_target = int(target_act >= 0.5)
+    hate_speech_strength = strength_act.argmax().item()
+    if is_hate_speech:
+        hate_speech_target_label = target_names[hate_speech_target]
+        hate_speech_strength_label = strength_names[hate_speech_strength]
+        hate_speech_type_label = []
+        print('target', target_act)
+        print('strength', strength_act)
+        for idx, prob in enumerate(type_act):
+            if prob >= 0.5:
+                hate_speech_type_label.append(type_names[idx])
+        if len(hate_speech_type_label) == 0:
+            hate_speech_type_label.append("Other")
+    else:
+        hate_speech_target_label = "Non-HS"
+        hate_speech_strength_label = "Non-HS"
+        hate_speech_type_label = "Non-HS"
+    return is_hate_speech, is_abusive, hate_speech_target_label, hate_speech_strength_label, {"hs_type":hate_speech_type_label}
+# Create the Gradio interface
+iface = gr.Interface(fn=predict, inputs=gr.Textbox(label="Enter a sentence"), outputs=[
+    gr.Label(label="Is Hate Speech"),
+    gr.Label(label="Is Abusive"),
+    gr.Label(label="Hate Speech Target"),
+    gr.Label(label="Hate Speech Strength"),
+    gr.JSON(label="Hate Speech Type")
+], title="Hate Speech Detection")
+iface.launch()  # Launches the mini app!

model.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch.nn as nn
+import torchvision.models as models
+import torch
+from transformers import AutoTokenizer, AutoModel, AutoConfig
+class Classifier(nn.Module):
+    def __init__(self, input_size = 512, output_sizes = [1], dropout_rate = 0.1):
+        super(Classifier, self).__init__()
+        self.hs_head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(input_size, output_sizes[0])
+        )
+        self.abusive_head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(input_size, output_sizes[1])
+        )
+        self.target_head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(input_size, output_sizes[2])
+        )
+        self.strength_head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(input_size, output_sizes[3])
+        )
+        self.type_head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(input_size, output_sizes[4])
+        )
+    def forward(self, input):
+        return self.hs_head(input), self.abusive_head(input), self.target_head(input), \
+            self.strength_head(input), self.type_head(input)

requirements.txt ADDED Viewed

	@@ -0,0 +1,76 @@

+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.3.0
+attrs==23.2.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.0
+cycler==0.12.1
+exceptiongroup==1.2.0
+fastapi==0.110.0
+ffmpy==0.3.2
+filelock==3.13.1
+fonttools==4.50.0
+fsspec==2024.3.1
+gradio==4.22.0
+gradio_client==0.13.0
+h11==0.14.0
+httpcore==1.0.4
+httpx==0.27.0
+huggingface-hub==0.21.4
+idna==3.6
+importlib_resources==6.3.2
+Jinja2==3.1.3
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.8.3
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.2.1
+numpy==1.26.4
+orjson==3.9.15
+packaging==24.0
+pandas==2.2.1
+pillow==10.2.0
+pydantic==2.6.4
+pydantic_core==2.16.3
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.34.0
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.0
+ruff==0.3.3
+safetensors==0.4.2
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.36.3
+sympy==1.12
+tokenizers==0.15.2
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.2.1
+torchaudio==2.2.1
+torchvision==0.17.1
+tqdm==4.66.2
+transformers==4.38.2
+typer==0.9.0
+typing_extensions==4.10.0
+tzdata==2024.1
+urllib3==2.2.1
+uvicorn==0.29.0
+websockets==11.0.3

weight.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb083732f7dd150113bba50f7f5125a4d3b83adf98db912d64394d38d9290e1b
+size 504022203