Spaces:
Sleeping
Sleeping
Commit
·
e8afe32
1
Parent(s):
d9f8f61
Upload 14 files
Browse files- .gitattributes +1 -0
- Procfile +1 -0
- README.md +10 -10
- Spam SMS Classifier - Deployment.py +66 -0
- Spam SMS Collection +0 -0
- app.py +25 -0
- cv-transform.pkl +3 -0
- requirements.txt +11 -0
- spam-sms-mnb-model.pkl +3 -0
- static/not-spam.webp +0 -0
- static/spam-favicon.ico +0 -0
- static/spam.webp +3 -0
- static/styles.css +126 -0
- templates/home.html +40 -0
- templates/result.html +43 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
static/spam.webp filter=lfs diff=lfs merge=lfs -text
|
Procfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: gunicorn app:app
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
1 |
+
# Spam SMS Classification - Deployment
|
2 |
+
  
|
3 |
+
|
4 |
+
• This repository consists of files required to deploy a ___Machine Learning Web App___ created with ___Flask___ on ___Heroku___ platform.
|
5 |
+
|
6 |
+
• If you want to view the deployed model, click on the following link:<br />
|
7 |
+
Deployed at: https://spam-message-predictor21.herokuapp.com/
|
8 |
+
|
9 |
+
|
10 |
+
• Please do ⭐ the repository, if it helped you in anyway.
|
Spam SMS Classifier - Deployment.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importing essential libraries
|
2 |
+
import pandas as pd
|
3 |
+
import pickle
|
4 |
+
|
5 |
+
# Loading the dataset
|
6 |
+
df = pd.read_csv('Spam SMS Collection', sep='\t', names=['label', 'message'])
|
7 |
+
|
8 |
+
# Importing essential libraries for performing Natural Language Processing on 'SMS Spam Collection' dataset
|
9 |
+
import nltk
|
10 |
+
import re
|
11 |
+
nltk.download('stopwords')
|
12 |
+
from nltk.corpus import stopwords
|
13 |
+
from nltk.stem.porter import PorterStemmer
|
14 |
+
|
15 |
+
# Cleaning the messages
|
16 |
+
corpus = []
|
17 |
+
ps = PorterStemmer()
|
18 |
+
|
19 |
+
for i in range(0,df.shape[0]):
|
20 |
+
|
21 |
+
# Cleaning special character from the message
|
22 |
+
message = re.sub(pattern='[^a-zA-Z]', repl=' ', string=df.message[i])
|
23 |
+
|
24 |
+
# Converting the entire message into lower case
|
25 |
+
message = message.lower()
|
26 |
+
|
27 |
+
# Tokenizing the review by words
|
28 |
+
words = message.split()
|
29 |
+
|
30 |
+
# Removing the stop words
|
31 |
+
words = [word for word in words if word not in set(stopwords.words('english'))]
|
32 |
+
|
33 |
+
# Stemming the words
|
34 |
+
words = [ps.stem(word) for word in words]
|
35 |
+
|
36 |
+
# Joining the stemmed words
|
37 |
+
message = ' '.join(words)
|
38 |
+
|
39 |
+
# Building a corpus of messages
|
40 |
+
corpus.append(message)
|
41 |
+
|
42 |
+
# Creating the Bag of Words model
|
43 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
44 |
+
cv = CountVectorizer(max_features=2500)
|
45 |
+
X = cv.fit_transform(corpus).toarray()
|
46 |
+
|
47 |
+
# Extracting dependent variable from the dataset
|
48 |
+
y = pd.get_dummies(df['label'])
|
49 |
+
y = y.iloc[:, 1].values
|
50 |
+
|
51 |
+
# Creating a pickle file for the CountVectorizer
|
52 |
+
pickle.dump(cv, open('cv-transform.pkl', 'wb'))
|
53 |
+
|
54 |
+
# Model Building
|
55 |
+
|
56 |
+
from sklearn.model_selection import train_test_split
|
57 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
|
58 |
+
|
59 |
+
# Fitting Naive Bayes to the Training set
|
60 |
+
from sklearn.naive_bayes import MultinomialNB
|
61 |
+
classifier = MultinomialNB(alpha=0.3)
|
62 |
+
classifier.fit(X_train, y_train)
|
63 |
+
|
64 |
+
# Creating a pickle file for the Multinomial Naive Bayes model
|
65 |
+
filename = 'spam-sms-mnb-model.pkl'
|
66 |
+
pickle.dump(classifier, open(filename, 'wb'))
|
Spam SMS Collection
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importing essential libraries
|
2 |
+
from flask import Flask, render_template, request
|
3 |
+
import pickle
|
4 |
+
|
5 |
+
# Load the Multinomial Naive Bayes model and CountVectorizer object from disk
|
6 |
+
filename = 'spam-sms-mnb-model.pkl'
|
7 |
+
classifier = pickle.load(open(filename, 'rb'))
|
8 |
+
cv = pickle.load(open('cv-transform.pkl','rb'))
|
9 |
+
app = Flask(__name__)
|
10 |
+
|
11 |
+
@app.route('/')
|
12 |
+
def home():
|
13 |
+
return render_template('home.html')
|
14 |
+
|
15 |
+
@app.route('/predict',methods=['POST'])
|
16 |
+
def predict():
|
17 |
+
if request.method == 'POST':
|
18 |
+
message = request.form['message']
|
19 |
+
data = [message]
|
20 |
+
vect = cv.transform(data).toarray()
|
21 |
+
my_prediction = classifier.predict(vect)
|
22 |
+
return render_template('result.html', prediction=my_prediction)
|
23 |
+
|
24 |
+
if __name__ == '__main__':
|
25 |
+
app.run(debug=True)
|
cv-transform.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6850efa9440ff2b4cc6346f16adb34412101d83be2129c9ea7ce159f0487341e
|
3 |
+
size 179663
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask==1.1.1
|
2 |
+
gunicorn==19.9.0
|
3 |
+
itsdangerous==1.1.0
|
4 |
+
Jinja2==2.10.1
|
5 |
+
MarkupSafe==1.1.1
|
6 |
+
Werkzeug==0.15.5
|
7 |
+
numpy>=1.9.2
|
8 |
+
scipy>=0.15.1
|
9 |
+
scikit-learn>=0.18
|
10 |
+
matplotlib>=1.4.3
|
11 |
+
pandas>=0.19
|
spam-sms-mnb-model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6cc509111cefe1d973976508cb26039511b2d8a4b7d7832a407cd3333dfe342
|
3 |
+
size 80636
|
static/not-spam.webp
ADDED
![]() |
static/spam-favicon.ico
ADDED
|
static/spam.webp
ADDED
![]() |
Git LFS Details
|
static/styles.css
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
html{
|
2 |
+
height: 100%;
|
3 |
+
margin: 0;
|
4 |
+
}
|
5 |
+
|
6 |
+
body{
|
7 |
+
font-family: Arial, Helvetica,sans-serif;
|
8 |
+
text-align: center;
|
9 |
+
margin: 0;
|
10 |
+
padding: 0;
|
11 |
+
width: 100%;
|
12 |
+
height: 100%;
|
13 |
+
display: flex;
|
14 |
+
flex-direction: column;
|
15 |
+
}
|
16 |
+
|
17 |
+
/* Website Title */
|
18 |
+
.container{
|
19 |
+
padding: 30px;
|
20 |
+
position: relative;
|
21 |
+
background: linear-gradient(45deg, #ffffff, #ffffff, #f9f9f9, #eeeeee, #e0e4e1, #d7e1ec);
|
22 |
+
background-size: 500% 500%;
|
23 |
+
animation: change-gradient 10s ease-in-out infinite;
|
24 |
+
}
|
25 |
+
@keyframes change-gradient {
|
26 |
+
0%{
|
27 |
+
background-position: 0 50%;
|
28 |
+
}
|
29 |
+
50%{
|
30 |
+
background-position: 100% 50%;
|
31 |
+
}
|
32 |
+
100%{
|
33 |
+
background-position: 0 50%;
|
34 |
+
}
|
35 |
+
}
|
36 |
+
|
37 |
+
.container-heading{
|
38 |
+
margin: 0;
|
39 |
+
}
|
40 |
+
|
41 |
+
.container span{
|
42 |
+
color: #ff0000;
|
43 |
+
}
|
44 |
+
|
45 |
+
.description p{
|
46 |
+
font-style: italic;
|
47 |
+
font-size: 14px;
|
48 |
+
margin: 3px 0 0;
|
49 |
+
}
|
50 |
+
|
51 |
+
/* Text Area */
|
52 |
+
.ml-container{
|
53 |
+
margin: 30px 0;
|
54 |
+
flex: 1 0 auto;
|
55 |
+
}
|
56 |
+
|
57 |
+
.message-box{
|
58 |
+
margin-bottom: 20px;
|
59 |
+
}
|
60 |
+
|
61 |
+
/* Predict Button */
|
62 |
+
.my-cta-button{
|
63 |
+
background: #f9f9f9;
|
64 |
+
border: 2px solid #000000;
|
65 |
+
border-radius: 1000px;
|
66 |
+
box-shadow: 3px 3px #8c8c8c;
|
67 |
+
padding: 10px 36px;
|
68 |
+
color: #000000;
|
69 |
+
display: inline-block;
|
70 |
+
font: italic bold 20px/1 "Calibri", sans-serif;
|
71 |
+
text-align: center;
|
72 |
+
}
|
73 |
+
|
74 |
+
.my-cta-button:hover{
|
75 |
+
color: #ff0000;
|
76 |
+
border: 2px solid #ff0000;
|
77 |
+
}
|
78 |
+
|
79 |
+
.my-cta-button:active{
|
80 |
+
box-shadow: 0 0;
|
81 |
+
}
|
82 |
+
|
83 |
+
|
84 |
+
/* Footer */
|
85 |
+
.footer{
|
86 |
+
font-size: 14px;
|
87 |
+
padding: 20px;
|
88 |
+
flex-shrink: 0;
|
89 |
+
position: relative;
|
90 |
+
background: linear-gradient(45deg, #ffffff, #ffffff, #f9f9f9, #eeeeee, #e0e4e1, #d7e1ec);
|
91 |
+
background-size: 500% 500%;
|
92 |
+
animation: change-gradient 10s ease-in-out infinite;
|
93 |
+
}
|
94 |
+
|
95 |
+
.contact-icon{
|
96 |
+
color: #000000;
|
97 |
+
padding: 7px;
|
98 |
+
}
|
99 |
+
|
100 |
+
|
101 |
+
.contact-icon:hover{
|
102 |
+
color: #8c8c8c;
|
103 |
+
}
|
104 |
+
|
105 |
+
.footer-description{
|
106 |
+
margin: 0;
|
107 |
+
font-size: 12px;
|
108 |
+
}
|
109 |
+
|
110 |
+
/* Result */
|
111 |
+
.results{
|
112 |
+
padding: 30px 0 0;
|
113 |
+
flex: 1 0 auto;
|
114 |
+
}
|
115 |
+
|
116 |
+
.danger{
|
117 |
+
color: #ff0000;
|
118 |
+
}
|
119 |
+
|
120 |
+
.safe{
|
121 |
+
color: green;
|
122 |
+
}
|
123 |
+
|
124 |
+
.gif{
|
125 |
+
width: 30%;
|
126 |
+
}
|
templates/home.html
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
|
3 |
+
<html lang="en" dir="ltr">
|
4 |
+
<head>
|
5 |
+
<meta charset="utf-8">
|
6 |
+
<title>Spam message predictor</title>
|
7 |
+
<link rel="shortcut icon" href="{{ url_for('static', filename='spam-favicon.ico') }}">
|
8 |
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='styles.css') }}">
|
9 |
+
<script src="https://kit.fontawesome.com/5f3f547070.js" crossorigin="anonymous"></script>
|
10 |
+
</head>
|
11 |
+
|
12 |
+
<body>
|
13 |
+
|
14 |
+
<!-- Website Title -->
|
15 |
+
<div class="container">
|
16 |
+
<h2 class='container-heading'><span>Spam Detector</span> for Short Message Service (SMS)</h2>
|
17 |
+
<div class='description'>
|
18 |
+
<p>A Machine Learning Web App, Built with Flask, Deployed using Heroku.</p>
|
19 |
+
</div>
|
20 |
+
</div>
|
21 |
+
|
22 |
+
<!-- Text Area -->
|
23 |
+
<div class="ml-container">
|
24 |
+
<form action="{{ url_for('predict') }}" method="POST">
|
25 |
+
<textarea class='message-box' name="message" rows="15" cols="75" placeholder="Enter Your Message Here..."></textarea><br/>
|
26 |
+
<input type="submit" class="my-cta-button" value="Predict">
|
27 |
+
</form>
|
28 |
+
</div>
|
29 |
+
|
30 |
+
<!-- Footer -->
|
31 |
+
<div class='footer'>
|
32 |
+
<div class="contact">
|
33 |
+
<a target="_blank" href="https://github.com/ArchitSharma21/Spam-message-predictor"><i class="fab fa-github fa-lg contact-icon"></i></a>
|
34 |
+
<a target="_blank" href="https://www.linkedin.com/in/thearchitsharma/"><i class="fab fa-linkedin fa-lg contact-icon"></i></a>
|
35 |
+
</div>
|
36 |
+
<p class='footer-description'>Made with ❤️ by Archit Sharma.</p>
|
37 |
+
</div>
|
38 |
+
|
39 |
+
</body>
|
40 |
+
</html>
|
templates/result.html
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
|
3 |
+
<html lang="en" dir="ltr">
|
4 |
+
<head>
|
5 |
+
<meta charset="utf-8">
|
6 |
+
<title>Spam message predictor</title>
|
7 |
+
<link rel="shortcut icon" href="{{ url_for('static', filename='spam-favicon.ico') }}">
|
8 |
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='styles.css') }}">
|
9 |
+
<script src="https://kit.fontawesome.com/5f3f547070.js" crossorigin="anonymous"></script>
|
10 |
+
</head>
|
11 |
+
|
12 |
+
<body>
|
13 |
+
|
14 |
+
<!-- Website Title -->
|
15 |
+
<div class="container">
|
16 |
+
<h2 class='container-heading'><span>Spam Detector</span> for Short Message Service (SMS)</h2>
|
17 |
+
<div class='description'>
|
18 |
+
<p>A Machine Learning Web App, Built with Flask, Deployed using Heroku.</p>
|
19 |
+
</div>
|
20 |
+
</div>
|
21 |
+
|
22 |
+
<!-- Result -->
|
23 |
+
<div class="results">
|
24 |
+
{% if prediction==1 %}
|
25 |
+
<h1>Prediction: <span class='danger'>Gotcha! This is a SPAM message.</span></h1>
|
26 |
+
<img class="gif" src="{{ url_for('static', filename='spam.webp') }}" alt="SPAM Image">
|
27 |
+
{% elif prediction==0 %}
|
28 |
+
<h1>Prediction: <span class='safe'>Great! This is NOT a spam message.</span></h1>
|
29 |
+
<img class="gif" src="{{ url_for('static', filename='not-spam.webp') }}" alt="Not a spam image">
|
30 |
+
{% endif %}
|
31 |
+
</div>
|
32 |
+
|
33 |
+
<!-- Footer -->
|
34 |
+
<div class='footer'>
|
35 |
+
<div class="contact">
|
36 |
+
<a target="_blank" href="https://github.com/ArchitSharma21/Spam-message-predictor"><i class="fab fa-github fa-lg contact-icon"></i></a>
|
37 |
+
<a target="_blank" href="https://www.linkedin.com/in/thearchitsharma/"><i class="fab fa-linkedin fa-lg contact-icon"></i></a>
|
38 |
+
</div>
|
39 |
+
<p class='footer-description'>Made with ❤️ by Archit Sharma.</p>
|
40 |
+
</div>
|
41 |
+
|
42 |
+
</body>
|
43 |
+
</html>
|