Hugging Face's logo Hugging Face Search models, datasets, users... Models Datasets Spaces Posts Docs Solutions Pricing Spaces: Asa-AI-Lab / Offensive-Detection-Space private Logs App Files Community Settings Offensive-Detection-Space / app.py hafez97's picture hafez97 Update app.py b244916 verified 13 days ago raw Copy download link history blame edit delete 2.96 kB import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import os import torch from cleantext import clean import hazm import re def cleanhtml(raw_html): cleanr = re.compile('<.*?>') cleantext = re.sub(cleanr, '', raw_html) return cleantext def cleaning(text): text = text.strip() # regular cleaning text = clean(text, clean_all=True, punct=True, stopwords=True, stemming=True, extra_spaces=True ) # cleaning htmls text = cleanhtml(text) # normalizing normalizer = hazm.Normalizer() text = normalizer.normalize(text) # removing wierd patterns wierd_pattern = re.compile("[" u"\U0001F600-\U0001F64F" # emoticons u"\U0001F300-\U0001F5FF" # symbols & pictographs u"\U0001F680-\U0001F6FF" # transport & map symbols u"\U0001F1E0-\U0001F1FF" # flags (iOS) u"\U00002702-\U000027B0" u"\U000024C2-\U0001F251" u"\U0001f926-\U0001f937" u'\U00010000-\U0010ffff' u"\u200d" u"\u2640-\u2642" u"\u2600-\u2B55" u"\u23cf" u"\u23e9" u"\u231a" u"\u3030" u"\ufe0f" u"\u2069" u"\u2066" # u"\u200c" u"\u2068" u"\u2067" "]+", flags=re.UNICODE) text = wierd_pattern.sub(r'', text) # removing extra spaces, hashtags text = re.sub("#", "", text) text = re.sub("\s+", " ", text) return text access_token = os.getenv('ACCESS_TOKEN') tokenizer = AutoTokenizer.from_pretrained("HamidRezaei/Persian-Offensive-Language-Detection-Lora", token=access_token) model = AutoModelForSequenceClassification.from_pretrained("HamidRezaei/Persian-Offensive-Language-Detection-Lora", token=access_token) st.title("Offensive or Not?") prompt = st.text_area(label="Send a message") button = st.button("send") if prompt: normalized_prompt = cleaning(prompt) encoding = tokenizer(normalized_prompt, return_tensors="pt") encoding = {k: v.to(model.device) for k,v in encoding.items()} outputs = model(**encoding) logits = outputs.logits # apply sigmoid + threshold sigmoid = torch.nn.Sigmoid() probs = sigmoid(logits.squeeze().cpu()) score = probs.item() st.markdown(f"Offensive: score {score}" if score > 0.5 else f"Not Offensive: score {score}")