Kaala741 commited on
Commit
8a86e97
·
verified ·
1 Parent(s): 43a54e0

Upload 7 files

Browse files
__pycache__/model.cpython-312.pyc ADDED
Binary file (1.78 kB). View file
 
__pycache__/preprocessing.cpython-312.pyc ADDED
Binary file (2.3 kB). View file
 
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from preprocessing import preprocess_text
3
+ from model import load_model,predict
4
+ from streamlit_lottie import st_lottie
5
+ import requests
6
+
7
+
8
+ def load_lottieurl(url:str):
9
+ r = requests.get(url)
10
+ if r.status_code != 200:
11
+ return new_func()
12
+ return r.json()
13
+
14
+ def load_lottiefile(filepath:str):
15
+ with open(filepath,encoding="utf8") as f:
16
+ data =json.load(f)
17
+ return data
18
+
19
+ # Load the model and tokenizer
20
+ model_path = 'Model/bert_classifier.h5'
21
+ model, tokenizer = load_model(model_path)
22
+
23
+ lottie_coding = load_lottieurl("https://lottie.host/05d8286a-86bb-483e-a845-5715aca4241e/hrUyVR89oh.json")
24
+ lottie_coding2 = load_lottieurl("https://lottie.host/5735c629-79f5-4312-b4e4-268709e0eaab/D4OjpsayFs.json")
25
+
26
+ # Streamlit UI
27
+ st.set_page_config(layout="wide",page_title="Hate-speech-Detection",page_icon=":computer:")
28
+ st.markdown("<h1 style='text-align:center;color:white;'>Welcome To Our Website !</h1>", unsafe_allow_html=True)
29
+ #---Main Section---
30
+ with st.container():
31
+ st.header('Automated Hate Speech Detection')
32
+ input_text = st.text_area('Enter text to analyze', '')
33
+ if st.button('Analyze'):
34
+ preprocessed_text = preprocess_text(input_text)
35
+ classification = predict(preprocessed_text, model, tokenizer)
36
+ st.write("Classification:", classification)
37
+ st.write("---")
38
+ #---Header Section---
39
+ with st.container():
40
+ left_column,right_column=st.columns(2)
41
+ with left_column:
42
+ st.header("ABOUT")
43
+ st.write(
44
+ """
45
+ This website employs an automated system to detect and manage harmful content that incites hatred, violence, or discrimination against individuals or groups based on characteristics such as race, ethnicity, gender, sexual orientation, or religion. By using machine learning algorithms and natural language processing, the system analyzes user-generated comments and posts on the Reddit platform to identify and flag potentially offensive or harmful language
46
+ """
47
+ )
48
+ with right_column:
49
+ st_lottie(lottie_coding, height=400, key="coding")
50
+ # --- FORM SECTION ---
51
+ with st.container():
52
+ st.write("---")
53
+ st.header("Get In Touch With Me!")
54
+ contact_form = """
55
+ <form action="https://formsubmit.co/[email protected]" method="POST">
56
+ <input type="hidden" name="_captcha" value="false">
57
+ <input type="text" name="name" placeholder="Your name" required>
58
+ <input type="email" name="email" placeholder="Your email" required>
59
+ <textarea name="message" placeholder="Your message here" required></textarea>
60
+ <button type="submit">Send</button>
61
+ </form>
62
+ """
63
+ def local_css(file_name):
64
+ with open(file_name) as f:
65
+ st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
66
+ local_css("AutomatedHateSpeech/form.css")
67
+
68
+ left_column, right_column = st.columns(2)
69
+ with left_column:
70
+ st.markdown(contact_form, unsafe_allow_html=True)
71
+ with right_column:
72
+ st_lottie(lottie_coding2, height=300,key="anime")
chat_words.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "AFAIK": "As Far As I Know",
3
+ "AFK": "Away From Keyboard",
4
+ "ASAP": "As Soon As Possible",
5
+ "ATK": "At The Keyboard",
6
+ "ATM": "At The Moment",
7
+ "A3": "Anytime, Anywhere, Anyplace",
8
+ "BAK": "Back At Keyboard",
9
+ "BBL": "Be Back Later",
10
+ "BBS": "Be Back Soon",
11
+ "BFN": "Bye For Now",
12
+ "B4N": "Bye For Now",
13
+ "BRB": "Be Right Back",
14
+ "BRT": "Be Right There",
15
+ "BTW": "By The Way",
16
+ "B4": "Before",
17
+ "CU": "See You",
18
+ "CUL8R": "See You Later",
19
+ "CYA": "See You",
20
+ "FAQ": "Frequently Asked Questions",
21
+ "FC": "Fingers Crossed",
22
+ "FWIW": "For What Its Worth",
23
+ "FYI": "For Your Information",
24
+ "GAL": "Get A Life",
25
+ "GG": "Good Game",
26
+ "GN": "Good Night",
27
+ "GMTA": "Great Minds Think Alike",
28
+ "GR8": "Great!",
29
+ "G9": "Genius",
30
+ "IC": "I See",
31
+ "ICQ": "I Seek you (also a chat program)",
32
+ "ILU": "I Love You",
33
+ "IMHO": "In My Honest/Humble Opinion",
34
+ "IMO": "In My Opinion",
35
+ "IOW": "In Other Words",
36
+ "IRL": "In Real Life",
37
+ "KISS": "Keep It Simple, Stupid",
38
+ "LDR": "Long Distance Relationship",
39
+ "LMAO": "Laugh My A.. Off",
40
+ "LOL": "Laughing Out Loud",
41
+ "LTNS": "Long Time No See",
42
+ "L8R": "Later",
43
+ "MTE": "My Thoughts Exactly",
44
+ "M8": "Mate",
45
+ "NRN": "No Reply Necessary",
46
+ "OIC": "Oh I See",
47
+ "PITA": "Pain In The A..",
48
+ "PRT": "Party",
49
+ "PRW": "Parents Are Watching",
50
+ "QPSA": "Que Pasa?",
51
+ "ROFL": "Rolling On The Floor Laughing",
52
+ "ROFLOL": "Rolling On The Floor Laughing Out Loud",
53
+ "ROTFLMAO": "Rolling On The Floor Laughing My A.. Off",
54
+ "SK8": "Skate",
55
+ "STATS": "Your sex and age",
56
+ "ASL": "Age, Sex, Location",
57
+ "THX": "Thank You",
58
+ "TTFN": "Ta-Ta For Now!",
59
+ "TTYL": "Talk To You Later",
60
+ "U": "You",
61
+ "U2": "You Too",
62
+ "U4E": "Yours For Ever",
63
+ "WB": "Welcome Back",
64
+ "WTF": "What The F...",
65
+ "WTG": "Way To Go!",
66
+ "WUF": "Where Are You From?",
67
+ "W8": "Wait...",
68
+ "7K": "Sick"
69
+ }
form.css ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* CSS Snippet from W3schools: https://www.w3schools.com/howto/howto_css_contact_form.asp */
2
+ /* Style inputs with type="text", select elements and textareas */
3
+ input[type=message], input[type=email], input[type=text], textarea {
4
+ width: 100%; /* Full width */
5
+ padding: 12px; /* Some padding */
6
+ border: 1px solid #ccc; /* Gray border */
7
+ border-radius: 4px; /* Rounded borders */
8
+ box-sizing: border-box; /* Make sure that padding and width stays in place */
9
+ margin-top: 6px; /* Add a top margin */
10
+ margin-bottom: 16px; /* Bottom margin */
11
+ resize: vertical /* Allow the user to vertically resize the textarea (not horizontally) */
12
+ }
13
+
14
+ /* Style the submit button with a specific background color etc */
15
+ button[type=submit] {
16
+ background-color: #04AA6D;
17
+ color: white;
18
+ padding: 12px 20px;
19
+ border: none;
20
+ border-radius: 4px;
21
+ cursor: pointer;
22
+ }
23
+
24
+ /* When moving the mouse over the submit button, add a darker green color */
25
+ button[type=submit]:hover {
26
+ background-color: #45a049;
27
+ }
28
+
29
+
30
+ /* Hide Streamlit Branding */
31
+ #MainMenu {visibility: hidden;}
32
+ footer {visibility: hidden;}
33
+ header {visibility: hidden;}
number_meanings.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "111": "Alignment and manifestation",
3
+ "1111": "New beginnings and unity",
4
+ "222": "Balance and harmony",
5
+ "333": "Protection and encouragement",
6
+ "444": "Foundation and grounding",
7
+ "555": "Change and transformation",
8
+ "666": "Reflection and personal development",
9
+ "777": "Spiritual growth and good fortune",
10
+ "888": "Abundance and success",
11
+ "999": "Completion and closure",
12
+ "1010": "Personal development and growth",
13
+ "1212": "Spiritual awakening and higher consciousness",
14
+ "1234": "Progress and moving forward",
15
+ "2222": "Peace and balance in life",
16
+ "3333": "Divine protection and guidance",
17
+ "4444": "Strong foundation and support",
18
+ "5555": "Major life changes",
19
+ "6666": "Reflecting on past choices",
20
+ "7777": "Luck and spiritual awareness",
21
+ "8888": "Financial abundance and prosperity",
22
+ "9999": "Endings leading to new beginnings",
23
+ "1001": "Self-improvement and new perspectives",
24
+ "1101": "Opening new doors and opportunities",
25
+ "1211": "Embracing new phases in life",
26
+ "1233": "Balanced progress and harmony",
27
+ "1441": "Foundation and new opportunities",
28
+ "1515": "Personal growth and freedom",
29
+ "1616": "Stability and inner strength",
30
+ "1717": "Awakening and self-awareness",
31
+ "1818": "Financial success and independence",
32
+ "1919": "Completion of cycles and new beginnings",
33
+ "2020": "Vision and clarity",
34
+ "2121": "Positive change and new opportunities",
35
+ "2323": "Creativity and self-expression",
36
+ "2424": "Building strong foundations",
37
+ "2525": "Adaptability and positive change",
38
+ "2626": "Nurturing and care for loved ones",
39
+ "2727": "Spiritual awareness and inner wisdom",
40
+ "2828": "Abundance and prosperity",
41
+ "2929": "Transformation and new phases",
42
+ "3030": "Creativity and communication",
43
+ "3131": "Optimism and positive energy",
44
+ "3232": "Faith and trust in the universe",
45
+ "3434": "Stability and determination",
46
+ "3535": "Personal freedom and individuality",
47
+ "3636": "Nurturing and compassion",
48
+ "3737": "Spiritual growth and awareness",
49
+ "3838": "Ab"
50
+ }
preprocessing.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import string
4
+ import emoji
5
+ from nltk.tokenize import word_tokenize
6
+ from nltk.stem import WordNetLemmatizer
7
+ # Load chat words and number meanings if needed
8
+ with open(r'chat_words.json','r') as f:
9
+ chat_words = json.load(f)
10
+
11
+ with open(r'number_meanings.json','r') as f:
12
+ number_meanings = json.load(f)
13
+
14
+ # Initialize lemmatizer
15
+ lemmatizer = WordNetLemmatizer()
16
+ def preprocess_text(text):
17
+ # Lowercase text
18
+ text = text.lower()
19
+
20
+ # Clean spaces
21
+ text = ' '.join(text.split())
22
+
23
+ # Remove HTML tags
24
+ text = re.sub(r'<.*?>', '', text)
25
+
26
+ # Remove URLs
27
+ text = re.sub(r'https?://\S+|www\.\S+', '', text)
28
+
29
+ # Replace chat words
30
+ new_text = []
31
+ for word in text.split():
32
+ if word.upper() in chat_words:
33
+ new_text.append(chat_words[word.upper()])
34
+ else:
35
+ new_text.append(word)
36
+ text = " ".join(new_text)
37
+
38
+ # Convert emojis to text
39
+ text = emoji.demojize(text, delimiters=("", ""))
40
+
41
+ # Remove punctuation
42
+ text = text.translate(str.maketrans('', '', string.punctuation))
43
+
44
+ # Tokenize text
45
+ tokens = word_tokenize(text)
46
+
47
+ # Lemmatize tokens
48
+ tokens = [lemmatizer.lemmatize(word, pos='v') for word in tokens]
49
+
50
+ # Join tokens back into sentence
51
+ text = ' '.join(tokens)
52
+
53
+ return text