Spaces:

HamidRezaei
/

Offensive-Detection-Lora

Sleeping

App Files Files Community

HamidRezaei commited on Nov 5, 2024

Commit

bcb1984

verified ·

1 Parent(s): aec4a54

Create app.py

Browse files

Files changed (1) hide show

app.py +135 -0

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+Hugging Face's logo
+Hugging Face
+Search models, datasets, users...
+Models
+Datasets
+Spaces
+Posts
+Docs
+Solutions
+Pricing
+Spaces:
+Asa-AI-Lab
+/
+Offensive-Detection-Space
+private
+Logs
+App
+Files
+Community
+Settings
+Offensive-Detection-Space
+/
+app.py
+hafez97's picture
+hafez97
+Update app.py
+b244916
+verified
+13 days ago
+raw
+Copy download link
+history
+blame
+edit
+delete
+2.96 kB
+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import os
+import torch
+from cleantext import clean
+import hazm
+import re
+def cleanhtml(raw_html):
+    cleanr = re.compile('<.*?>')
+    cleantext = re.sub(cleanr, '', raw_html)
+    return cleantext
+def cleaning(text):
+    text = text.strip()
+    # regular cleaning
+    text = clean(text,
+                 clean_all=True,
+                 punct=True,
+                 stopwords=True,
+                 stemming=True,
+                 extra_spaces=True
+                 )
+    # cleaning htmls
+    text = cleanhtml(text)
+    # normalizing
+    normalizer = hazm.Normalizer()
+    text = normalizer.normalize(text)
+    # removing wierd patterns
+    wierd_pattern = re.compile("["
+                               u"\U0001F600-\U0001F64F"  # emoticons
+                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                               u"\U00002702-\U000027B0"
+                               u"\U000024C2-\U0001F251"
+                               u"\U0001f926-\U0001f937"
+                               u'\U00010000-\U0010ffff'
+                               u"\u200d"
+                               u"\u2640-\u2642"
+                               u"\u2600-\u2B55"
+                               u"\u23cf"
+                               u"\u23e9"
+                               u"\u231a"
+                               u"\u3030"
+                               u"\ufe0f"
+                               u"\u2069"
+                               u"\u2066"
+                               # u"\u200c"
+                               u"\u2068"
+                               u"\u2067"
+                               "]+", flags=re.UNICODE)
+    text = wierd_pattern.sub(r'', text)
+    # removing extra spaces, hashtags
+    text = re.sub("#", "", text)
+    text = re.sub("\s+", " ", text)
+    return text
+access_token = os.getenv('ACCESS_TOKEN')
+tokenizer = AutoTokenizer.from_pretrained("HamidRezaei/Persian-Offensive-Language-Detection-Lora", token=access_token)
+model = AutoModelForSequenceClassification.from_pretrained("HamidRezaei/Persian-Offensive-Language-Detection-Lora", token=access_token)
+st.title("Offensive or Not?")
+prompt = st.text_area(label="Send a message")
+button = st.button("send")
+if prompt:
+    normalized_prompt = cleaning(prompt)
+    encoding = tokenizer(normalized_prompt, return_tensors="pt")
+    encoding = {k: v.to(model.device) for k,v in encoding.items()}
+    outputs = model(**encoding)
+    logits = outputs.logits
+    # apply sigmoid + threshold
+    sigmoid = torch.nn.Sigmoid()
+    probs = sigmoid(logits.squeeze().cpu())
+    score = probs.item()
+    st.markdown(f"Offensive: score {score}" if score > 0.5 else f"Not Offensive: score {score}")