ArchitSharma commited on
Commit
e8afe32
·
1 Parent(s): d9f8f61

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ static/spam.webp filter=lfs diff=lfs merge=lfs -text
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn app:app
README.md CHANGED
@@ -1,10 +1,10 @@
1
- ---
2
- title: Spam Message Predictor
3
- emoji: 🐢
4
- colorFrom: purple
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Spam SMS Classification - Deployment
2
+ ![Kaggle](https://img.shields.io/badge/Dataset-Kaggle-blue.svg) ![Python 3.6](https://img.shields.io/badge/Python-3.6-brightgreen.svg) ![NLTK](https://img.shields.io/badge/Library-NLTK-orange.svg)
3
+
4
+ This repository consists of files required to deploy a ___Machine Learning Web App___ created with ___Flask___ on ___Heroku___ platform.
5
+
6
+ If you want to view the deployed model, click on the following link:<br />
7
+ Deployed at: https://spam-message-predictor21.herokuapp.com/
8
+
9
+
10
+ Please do ⭐ the repository, if it helped you in anyway.
Spam SMS Classifier - Deployment.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing essential libraries
2
+ import pandas as pd
3
+ import pickle
4
+
5
+ # Loading the dataset
6
+ df = pd.read_csv('Spam SMS Collection', sep='\t', names=['label', 'message'])
7
+
8
+ # Importing essential libraries for performing Natural Language Processing on 'SMS Spam Collection' dataset
9
+ import nltk
10
+ import re
11
+ nltk.download('stopwords')
12
+ from nltk.corpus import stopwords
13
+ from nltk.stem.porter import PorterStemmer
14
+
15
+ # Cleaning the messages
16
+ corpus = []
17
+ ps = PorterStemmer()
18
+
19
+ for i in range(0,df.shape[0]):
20
+
21
+ # Cleaning special character from the message
22
+ message = re.sub(pattern='[^a-zA-Z]', repl=' ', string=df.message[i])
23
+
24
+ # Converting the entire message into lower case
25
+ message = message.lower()
26
+
27
+ # Tokenizing the review by words
28
+ words = message.split()
29
+
30
+ # Removing the stop words
31
+ words = [word for word in words if word not in set(stopwords.words('english'))]
32
+
33
+ # Stemming the words
34
+ words = [ps.stem(word) for word in words]
35
+
36
+ # Joining the stemmed words
37
+ message = ' '.join(words)
38
+
39
+ # Building a corpus of messages
40
+ corpus.append(message)
41
+
42
+ # Creating the Bag of Words model
43
+ from sklearn.feature_extraction.text import CountVectorizer
44
+ cv = CountVectorizer(max_features=2500)
45
+ X = cv.fit_transform(corpus).toarray()
46
+
47
+ # Extracting dependent variable from the dataset
48
+ y = pd.get_dummies(df['label'])
49
+ y = y.iloc[:, 1].values
50
+
51
+ # Creating a pickle file for the CountVectorizer
52
+ pickle.dump(cv, open('cv-transform.pkl', 'wb'))
53
+
54
+ # Model Building
55
+
56
+ from sklearn.model_selection import train_test_split
57
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
58
+
59
+ # Fitting Naive Bayes to the Training set
60
+ from sklearn.naive_bayes import MultinomialNB
61
+ classifier = MultinomialNB(alpha=0.3)
62
+ classifier.fit(X_train, y_train)
63
+
64
+ # Creating a pickle file for the Multinomial Naive Bayes model
65
+ filename = 'spam-sms-mnb-model.pkl'
66
+ pickle.dump(classifier, open(filename, 'wb'))
Spam SMS Collection ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing essential libraries
2
+ from flask import Flask, render_template, request
3
+ import pickle
4
+
5
+ # Load the Multinomial Naive Bayes model and CountVectorizer object from disk
6
+ filename = 'spam-sms-mnb-model.pkl'
7
+ classifier = pickle.load(open(filename, 'rb'))
8
+ cv = pickle.load(open('cv-transform.pkl','rb'))
9
+ app = Flask(__name__)
10
+
11
+ @app.route('/')
12
+ def home():
13
+ return render_template('home.html')
14
+
15
+ @app.route('/predict',methods=['POST'])
16
+ def predict():
17
+ if request.method == 'POST':
18
+ message = request.form['message']
19
+ data = [message]
20
+ vect = cv.transform(data).toarray()
21
+ my_prediction = classifier.predict(vect)
22
+ return render_template('result.html', prediction=my_prediction)
23
+
24
+ if __name__ == '__main__':
25
+ app.run(debug=True)
cv-transform.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6850efa9440ff2b4cc6346f16adb34412101d83be2129c9ea7ce159f0487341e
3
+ size 179663
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask==1.1.1
2
+ gunicorn==19.9.0
3
+ itsdangerous==1.1.0
4
+ Jinja2==2.10.1
5
+ MarkupSafe==1.1.1
6
+ Werkzeug==0.15.5
7
+ numpy>=1.9.2
8
+ scipy>=0.15.1
9
+ scikit-learn>=0.18
10
+ matplotlib>=1.4.3
11
+ pandas>=0.19
spam-sms-mnb-model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6cc509111cefe1d973976508cb26039511b2d8a4b7d7832a407cd3333dfe342
3
+ size 80636
static/not-spam.webp ADDED
static/spam-favicon.ico ADDED
static/spam.webp ADDED

Git LFS Details

  • SHA256: 2b5a49afeb256f1cc397f1c43857a678fc41fdaeb7a579baa1e20419239b0017
  • Pointer size: 132 Bytes
  • Size of remote file: 1.93 MB
static/styles.css ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ html{
2
+ height: 100%;
3
+ margin: 0;
4
+ }
5
+
6
+ body{
7
+ font-family: Arial, Helvetica,sans-serif;
8
+ text-align: center;
9
+ margin: 0;
10
+ padding: 0;
11
+ width: 100%;
12
+ height: 100%;
13
+ display: flex;
14
+ flex-direction: column;
15
+ }
16
+
17
+ /* Website Title */
18
+ .container{
19
+ padding: 30px;
20
+ position: relative;
21
+ background: linear-gradient(45deg, #ffffff, #ffffff, #f9f9f9, #eeeeee, #e0e4e1, #d7e1ec);
22
+ background-size: 500% 500%;
23
+ animation: change-gradient 10s ease-in-out infinite;
24
+ }
25
+ @keyframes change-gradient {
26
+ 0%{
27
+ background-position: 0 50%;
28
+ }
29
+ 50%{
30
+ background-position: 100% 50%;
31
+ }
32
+ 100%{
33
+ background-position: 0 50%;
34
+ }
35
+ }
36
+
37
+ .container-heading{
38
+ margin: 0;
39
+ }
40
+
41
+ .container span{
42
+ color: #ff0000;
43
+ }
44
+
45
+ .description p{
46
+ font-style: italic;
47
+ font-size: 14px;
48
+ margin: 3px 0 0;
49
+ }
50
+
51
+ /* Text Area */
52
+ .ml-container{
53
+ margin: 30px 0;
54
+ flex: 1 0 auto;
55
+ }
56
+
57
+ .message-box{
58
+ margin-bottom: 20px;
59
+ }
60
+
61
+ /* Predict Button */
62
+ .my-cta-button{
63
+ background: #f9f9f9;
64
+ border: 2px solid #000000;
65
+ border-radius: 1000px;
66
+ box-shadow: 3px 3px #8c8c8c;
67
+ padding: 10px 36px;
68
+ color: #000000;
69
+ display: inline-block;
70
+ font: italic bold 20px/1 "Calibri", sans-serif;
71
+ text-align: center;
72
+ }
73
+
74
+ .my-cta-button:hover{
75
+ color: #ff0000;
76
+ border: 2px solid #ff0000;
77
+ }
78
+
79
+ .my-cta-button:active{
80
+ box-shadow: 0 0;
81
+ }
82
+
83
+
84
+ /* Footer */
85
+ .footer{
86
+ font-size: 14px;
87
+ padding: 20px;
88
+ flex-shrink: 0;
89
+ position: relative;
90
+ background: linear-gradient(45deg, #ffffff, #ffffff, #f9f9f9, #eeeeee, #e0e4e1, #d7e1ec);
91
+ background-size: 500% 500%;
92
+ animation: change-gradient 10s ease-in-out infinite;
93
+ }
94
+
95
+ .contact-icon{
96
+ color: #000000;
97
+ padding: 7px;
98
+ }
99
+
100
+
101
+ .contact-icon:hover{
102
+ color: #8c8c8c;
103
+ }
104
+
105
+ .footer-description{
106
+ margin: 0;
107
+ font-size: 12px;
108
+ }
109
+
110
+ /* Result */
111
+ .results{
112
+ padding: 30px 0 0;
113
+ flex: 1 0 auto;
114
+ }
115
+
116
+ .danger{
117
+ color: #ff0000;
118
+ }
119
+
120
+ .safe{
121
+ color: green;
122
+ }
123
+
124
+ .gif{
125
+ width: 30%;
126
+ }
templates/home.html ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+
3
+ <html lang="en" dir="ltr">
4
+ <head>
5
+ <meta charset="utf-8">
6
+ <title>Spam message predictor</title>
7
+ <link rel="shortcut icon" href="{{ url_for('static', filename='spam-favicon.ico') }}">
8
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='styles.css') }}">
9
+ <script src="https://kit.fontawesome.com/5f3f547070.js" crossorigin="anonymous"></script>
10
+ </head>
11
+
12
+ <body>
13
+
14
+ <!-- Website Title -->
15
+ <div class="container">
16
+ <h2 class='container-heading'><span>Spam Detector</span> for Short Message Service (SMS)</h2>
17
+ <div class='description'>
18
+ <p>A Machine Learning Web App, Built with Flask, Deployed using Heroku.</p>
19
+ </div>
20
+ </div>
21
+
22
+ <!-- Text Area -->
23
+ <div class="ml-container">
24
+ <form action="{{ url_for('predict') }}" method="POST">
25
+ <textarea class='message-box' name="message" rows="15" cols="75" placeholder="Enter Your Message Here..."></textarea><br/>
26
+ <input type="submit" class="my-cta-button" value="Predict">
27
+ </form>
28
+ </div>
29
+
30
+ <!-- Footer -->
31
+ <div class='footer'>
32
+ <div class="contact">
33
+ <a target="_blank" href="https://github.com/ArchitSharma21/Spam-message-predictor"><i class="fab fa-github fa-lg contact-icon"></i></a>
34
+ <a target="_blank" href="https://www.linkedin.com/in/thearchitsharma/"><i class="fab fa-linkedin fa-lg contact-icon"></i></a>
35
+ </div>
36
+ <p class='footer-description'>Made with ❤️ by Archit Sharma.</p>
37
+ </div>
38
+
39
+ </body>
40
+ </html>
templates/result.html ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+
3
+ <html lang="en" dir="ltr">
4
+ <head>
5
+ <meta charset="utf-8">
6
+ <title>Spam message predictor</title>
7
+ <link rel="shortcut icon" href="{{ url_for('static', filename='spam-favicon.ico') }}">
8
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='styles.css') }}">
9
+ <script src="https://kit.fontawesome.com/5f3f547070.js" crossorigin="anonymous"></script>
10
+ </head>
11
+
12
+ <body>
13
+
14
+ <!-- Website Title -->
15
+ <div class="container">
16
+ <h2 class='container-heading'><span>Spam Detector</span> for Short Message Service (SMS)</h2>
17
+ <div class='description'>
18
+ <p>A Machine Learning Web App, Built with Flask, Deployed using Heroku.</p>
19
+ </div>
20
+ </div>
21
+
22
+ <!-- Result -->
23
+ <div class="results">
24
+ {% if prediction==1 %}
25
+ <h1>Prediction: <span class='danger'>Gotcha! This is a SPAM message.</span></h1>
26
+ <img class="gif" src="{{ url_for('static', filename='spam.webp') }}" alt="SPAM Image">
27
+ {% elif prediction==0 %}
28
+ <h1>Prediction: <span class='safe'>Great! This is NOT a spam message.</span></h1>
29
+ <img class="gif" src="{{ url_for('static', filename='not-spam.webp') }}" alt="Not a spam image">
30
+ {% endif %}
31
+ </div>
32
+
33
+ <!-- Footer -->
34
+ <div class='footer'>
35
+ <div class="contact">
36
+ <a target="_blank" href="https://github.com/ArchitSharma21/Spam-message-predictor"><i class="fab fa-github fa-lg contact-icon"></i></a>
37
+ <a target="_blank" href="https://www.linkedin.com/in/thearchitsharma/"><i class="fab fa-linkedin fa-lg contact-icon"></i></a>
38
+ </div>
39
+ <p class='footer-description'>Made with ❤️ by Archit Sharma.</p>
40
+ </div>
41
+
42
+ </body>
43
+ </html>