Commit
·
2880a2f
1
Parent(s):
ede32a1
Upload trained model
Browse filesThis repository contains code and resources for building a churn prediction model using machine learning techniques, and deploying it with Gradio for a user-friendly interface. Gradio is used to create a web interface for the trained model, which allows users to input customer data and get predictions on their likelihood of churning.
- ChurnProject3.py +153 -0
- Final_model.joblib +3 -0
- LP3 -Copy1.ipynb +565 -0
- README.md +77 -0
- Telco-Customer-Churn.csv +0 -0
- categorical_imputer.joblib +3 -0
- encoder.joblib +3 -0
- numerical_imputer.joblib +3 -0
- requirements.txt +5 -0
- scaler.joblib +3 -0
ChurnProject3.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pickle
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import joblib
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
|
9 |
+
num_imputer = joblib.load('numerical_imputer.joblib')
|
10 |
+
cat_imputer = joblib.load('categorical_imputer.joblib')
|
11 |
+
encoder = joblib.load('encoder.joblib')
|
12 |
+
scaler = joblib.load('scaler.joblib')
|
13 |
+
model = joblib.load('Final_model.joblib')
|
14 |
+
|
15 |
+
|
16 |
+
# Create a function that applies the ML pipeline and makes predictions
|
17 |
+
def predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
|
18 |
+
InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
|
19 |
+
Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges):
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
# Create a dataframe with the input data
|
24 |
+
input_df = pd.DataFrame({
|
25 |
+
'gender': [gender],
|
26 |
+
'SeniorCitizen': [SeniorCitizen],
|
27 |
+
'Partner': [Partner],
|
28 |
+
'Dependents': [Dependents],
|
29 |
+
'tenure': [tenure],
|
30 |
+
'PhoneService': [PhoneService],
|
31 |
+
'MultipleLines': [MultipleLines],
|
32 |
+
'InternetService': [InternetService],
|
33 |
+
'OnlineSecurity': [OnlineSecurity],
|
34 |
+
'OnlineBackup': [OnlineBackup],
|
35 |
+
'DeviceProtection': [DeviceProtection],
|
36 |
+
'TechSupport': [TechSupport],
|
37 |
+
'StreamingTV': [StreamingTV],
|
38 |
+
'StreamingMovies': [StreamingMovies],
|
39 |
+
'Contract': [Contract],
|
40 |
+
'PaperlessBilling': [PaperlessBilling],
|
41 |
+
'PaymentMethod': [PaymentMethod],
|
42 |
+
'MonthlyCharges': [MonthlyCharges],
|
43 |
+
'TotalCharges': [TotalCharges]
|
44 |
+
|
45 |
+
})
|
46 |
+
|
47 |
+
# Selecting categorical and numerical columns separately
|
48 |
+
cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
|
49 |
+
num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
|
50 |
+
|
51 |
+
# Apply the imputers on the input data
|
52 |
+
input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
|
53 |
+
input_df_imputed_num = num_imputer.transform(input_df[num_columns])
|
54 |
+
|
55 |
+
# Encode the categorical columns
|
56 |
+
input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
|
57 |
+
columns=encoder.get_feature_names_out(cat_columns))
|
58 |
+
|
59 |
+
# Scale the numerical columns
|
60 |
+
input_df_scaled = scaler.transform(input_df_imputed_num)
|
61 |
+
input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
|
62 |
+
|
63 |
+
|
64 |
+
#joining the cat encoded and num scaled
|
65 |
+
final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
|
66 |
+
|
67 |
+
final_df = final_df.reindex(columns=['SeniorCitizen','tenure','MonthlyCharges','TotalCharges',
|
68 |
+
'gender_Female','gender_Male','Partner_No','Partner_Yes','Dependents_No','Dependents_Yes','PhoneService_No',
|
69 |
+
'PhoneService_Yes','MultipleLines_No','MultipleLines_Yes','InternetService_DSL','InternetService_Fiber optic',
|
70 |
+
'InternetService_No','OnlineSecurity_No','OnlineSecurity_Yes','OnlineBackup_No','OnlineBackup_Yes','DeviceProtection_No',
|
71 |
+
'DeviceProtection_Yes','TechSupport_No','TechSupport_Yes','StreamingTV_No','StreamingTV_Yes','StreamingMovies_No',
|
72 |
+
'StreamingMovies_Yes','Contract_Month-to-month','Contract_One year','Contract_Two year','PaperlessBilling_No',
|
73 |
+
'PaperlessBilling_Yes','PaymentMethod_Bank transfer (automatic)','PaymentMethod_Credit card (automatic)','PaymentMethod_Electronic check',
|
74 |
+
'PaymentMethod_Mailed check'])
|
75 |
+
|
76 |
+
# Make predictions using the model
|
77 |
+
predictions = model.predict(final_df)
|
78 |
+
|
79 |
+
# Make predictions using the model
|
80 |
+
#predictions = model.predict(final_df)
|
81 |
+
|
82 |
+
# Convert the numpy array to an integer
|
83 |
+
#prediction_label = int(predictions.item())
|
84 |
+
|
85 |
+
prediction_label = "Beware!!! This customer is likely to Churn" if predictions.item() == "Yes" else "This customer is Not likely churn"
|
86 |
+
|
87 |
+
|
88 |
+
return prediction_label
|
89 |
+
|
90 |
+
#return predictions
|
91 |
+
|
92 |
+
input_interface=[]
|
93 |
+
with gr.Blocks(css=".gradio-container {background-color: powderblue}") as app:
|
94 |
+
img = gr.Image("C:/Users/user/Documents/AZUBI PROGRAM/CAREER ACELERATOR/LP4-buiding an app/Gradio/lp4_part2-1/telecom churn.png").style(height='13')
|
95 |
+
|
96 |
+
Title=gr.Label('CUSTOMER CHURN PREDICTION APP')
|
97 |
+
|
98 |
+
with gr.Row():
|
99 |
+
Title
|
100 |
+
with gr.Row():
|
101 |
+
img
|
102 |
+
|
103 |
+
#with gr.Blocks() as app:
|
104 |
+
# with gr.Blocks(css=".gradio-interface-container {background-color: powderblue}"):
|
105 |
+
#with gr.Row():
|
106 |
+
# gr.Label('Customer Churn Prediction Model')
|
107 |
+
with gr.Row():
|
108 |
+
gr.Markdown("This app predicts whether a customer will leave your company or not. Enter the details of the customer below to see the result")
|
109 |
+
|
110 |
+
#with gr.Row():
|
111 |
+
#gr.Label('This app predicts whether a customer will leave your company or not. Enter the details of the customer below to see the result')
|
112 |
+
|
113 |
+
|
114 |
+
with gr.Row():
|
115 |
+
with gr.Column(scale=3, min_width=600):
|
116 |
+
|
117 |
+
input_interface = [
|
118 |
+
gr.components.Radio(['male', 'female'], label='Select your gender'),
|
119 |
+
gr.components.Number(label="Are you a Seniorcitizen; No=0 and Yes=1"),
|
120 |
+
gr.components.Radio(['Yes', 'No'], label='Do you have Partner'),
|
121 |
+
gr.components.Dropdown(['No', 'Yes'], label='Do you have any Dependents? '),
|
122 |
+
gr.components.Number(label='Lenght of tenure (no. of months with Telco)'),
|
123 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have PhoneService? '),
|
124 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have MultipleLines'),
|
125 |
+
gr.components.Radio(['DSL', 'Fiber optic', 'No'], label='Do you have InternetService'),
|
126 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have OnlineSecurity?'),
|
127 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have OnlineBackup?'),
|
128 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have DeviceProtection?'),
|
129 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have TechSupport?'),
|
130 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have StreamingTV?'),
|
131 |
+
gr.components.Radio(['No', 'Yes'], label='Do you have StreamingMovies?'),
|
132 |
+
gr.components.Dropdown(['Month-to-month', 'One year', 'Two year'], label='which Contract do you use?'),
|
133 |
+
gr.components.Radio(['Yes', 'No'], label='Do you prefer PaperlessBilling?'),
|
134 |
+
gr.components.Dropdown(['Electronic check', 'Mailed check', 'Bank transfer (automatic)',
|
135 |
+
'Credit card (automatic)'], label='Which PaymentMethod do you prefer?'),
|
136 |
+
gr.components.Number(label="Enter monthly charges"),
|
137 |
+
gr.components.Number(label="Enter total charges")
|
138 |
+
]
|
139 |
+
|
140 |
+
with gr.Row():
|
141 |
+
submit_btn = gr.Button('Submit')
|
142 |
+
|
143 |
+
predict_btn = gr.Button('Predict')
|
144 |
+
|
145 |
+
# Define the output interfaces
|
146 |
+
output_interface = gr.Label(label="churn")
|
147 |
+
|
148 |
+
predict_btn.click(fn=predict, inputs=input_interface, outputs=output_interface)
|
149 |
+
|
150 |
+
|
151 |
+
|
152 |
+
|
153 |
+
app.launch(share=True)
|
Final_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a9fe6f72f1eca45ce476563b8dccc2968d26185d768095da7f2767d180be7cb
|
3 |
+
size 20651256
|
LP3 -Copy1.ipynb
ADDED
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "db772bcc",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"# Data handling\n",
|
11 |
+
"import pandas as pd\n",
|
12 |
+
"import numpy as np \n",
|
13 |
+
"\n",
|
14 |
+
"\n",
|
15 |
+
"# EDA (pandas-profiling, etc. )\n",
|
16 |
+
"...\n",
|
17 |
+
"\n",
|
18 |
+
"# Feature Processing (Scikit-learn processing, etc. )\n",
|
19 |
+
"from sklearn import preprocessing\n",
|
20 |
+
"\n",
|
21 |
+
"# Machine Learning (Scikit-learn Estimators, Catboost, LightGBM, etc. )\n",
|
22 |
+
"...\n",
|
23 |
+
"\n",
|
24 |
+
"# Hyperparameters Fine-tuning (Scikit-learn hp search, cross-validation, etc. )\n",
|
25 |
+
"...\n",
|
26 |
+
"\n",
|
27 |
+
"# Other packages\n",
|
28 |
+
"import os\n",
|
29 |
+
"import warnings\n",
|
30 |
+
"warnings.filterwarnings('ignore')\n",
|
31 |
+
"\n",
|
32 |
+
"#display all columns and rows \n",
|
33 |
+
"pd.set_option('display.max_columns', None)\n"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 2,
|
39 |
+
"id": "d80b4220",
|
40 |
+
"metadata": {},
|
41 |
+
"outputs": [
|
42 |
+
{
|
43 |
+
"name": "stdout",
|
44 |
+
"output_type": "stream",
|
45 |
+
"text": [
|
46 |
+
"Class counts before SMOTE: No 4111\n",
|
47 |
+
"Yes 1505\n",
|
48 |
+
"Name: Churn, dtype: int64\n",
|
49 |
+
"Class counts after SMOTE: Yes 4111\n",
|
50 |
+
"No 4111\n",
|
51 |
+
"Name: Churn, dtype: int64\n",
|
52 |
+
"AdaBoost Classifier: 0.9019360028118717\n",
|
53 |
+
"Logistic Regression Classifier: 0.8608679697080713\n",
|
54 |
+
"Random Forest Classifier: 0.9311295690912422\n",
|
55 |
+
"Gradient Boosting Classifier: 0.9235269779240596\n",
|
56 |
+
"SVM Classifier: 0.8944493562575639\n",
|
57 |
+
"Best model: Random Forest Classifier\n",
|
58 |
+
"AdaBoost Classifier classification report:\n",
|
59 |
+
" precision recall f1-score support\n",
|
60 |
+
"\n",
|
61 |
+
" No 0.90 0.76 0.82 1053\n",
|
62 |
+
" Yes 0.50 0.74 0.60 352\n",
|
63 |
+
"\n",
|
64 |
+
" accuracy 0.75 1405\n",
|
65 |
+
" macro avg 0.70 0.75 0.71 1405\n",
|
66 |
+
"weighted avg 0.80 0.75 0.77 1405\n",
|
67 |
+
"\n",
|
68 |
+
"\n",
|
69 |
+
"Logistic Regression Classifier classification report:\n",
|
70 |
+
" precision recall f1-score support\n",
|
71 |
+
"\n",
|
72 |
+
" No 0.92 0.73 0.81 1053\n",
|
73 |
+
" Yes 0.49 0.80 0.61 352\n",
|
74 |
+
"\n",
|
75 |
+
" accuracy 0.74 1405\n",
|
76 |
+
" macro avg 0.70 0.76 0.71 1405\n",
|
77 |
+
"weighted avg 0.81 0.74 0.76 1405\n",
|
78 |
+
"\n",
|
79 |
+
"\n",
|
80 |
+
"Random Forest Classifier classification report:\n",
|
81 |
+
" precision recall f1-score support\n",
|
82 |
+
"\n",
|
83 |
+
" No 0.86 0.84 0.85 1053\n",
|
84 |
+
" Yes 0.56 0.61 0.58 352\n",
|
85 |
+
"\n",
|
86 |
+
" accuracy 0.78 1405\n",
|
87 |
+
" macro avg 0.71 0.72 0.72 1405\n",
|
88 |
+
"weighted avg 0.79 0.78 0.79 1405\n",
|
89 |
+
"\n",
|
90 |
+
"\n",
|
91 |
+
"Gradient Boosting Classifier classification report:\n",
|
92 |
+
" precision recall f1-score support\n",
|
93 |
+
"\n",
|
94 |
+
" No 0.89 0.80 0.84 1053\n",
|
95 |
+
" Yes 0.54 0.69 0.60 352\n",
|
96 |
+
"\n",
|
97 |
+
" accuracy 0.77 1405\n",
|
98 |
+
" macro avg 0.71 0.74 0.72 1405\n",
|
99 |
+
"weighted avg 0.80 0.77 0.78 1405\n",
|
100 |
+
"\n",
|
101 |
+
"\n",
|
102 |
+
"SVM Classifier classification report:\n",
|
103 |
+
" precision recall f1-score support\n",
|
104 |
+
"\n",
|
105 |
+
" No 0.89 0.77 0.83 1053\n",
|
106 |
+
" Yes 0.52 0.73 0.60 352\n",
|
107 |
+
"\n",
|
108 |
+
" accuracy 0.76 1405\n",
|
109 |
+
" macro avg 0.71 0.75 0.72 1405\n",
|
110 |
+
"weighted avg 0.80 0.76 0.77 1405\n",
|
111 |
+
"\n",
|
112 |
+
"\n"
|
113 |
+
]
|
114 |
+
}
|
115 |
+
],
|
116 |
+
"source": [
|
117 |
+
"# For CSV, use pandas.read_csv\n",
|
118 |
+
"\n",
|
119 |
+
"df = pd.read_csv(\"Telco-Customer-Churn.csv\")\n",
|
120 |
+
"df.drop(['customerID'], axis=1, inplace=True)\n",
|
121 |
+
"# Coerce the conversion of TotalCharges column to float\n",
|
122 |
+
"df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')\n",
|
123 |
+
"# Remove the duplicate rows\n",
|
124 |
+
"df = df.drop_duplicates()\n",
|
125 |
+
"\n",
|
126 |
+
"cols_to_replace = ['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'MultipleLines']\n",
|
127 |
+
"df[cols_to_replace] = df[cols_to_replace].replace('No internet service', 'No').replace('No phone service', 'No')\n",
|
128 |
+
"\n",
|
129 |
+
"\n",
|
130 |
+
"from sklearn.model_selection import train_test_split\n",
|
131 |
+
"\n",
|
132 |
+
"# split the data into features (X) and target variable (y)\n",
|
133 |
+
"X = df.drop('Churn', axis=1)\n",
|
134 |
+
"y = df['Churn']\n",
|
135 |
+
"\n",
|
136 |
+
"# split the data into train and test sets\n",
|
137 |
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
138 |
+
"\n",
|
139 |
+
"# Identify numeric and non-numeric columns\n",
|
140 |
+
"num_cols = X.select_dtypes(include=[np.number]).columns.tolist()\n",
|
141 |
+
"cat_cols = X.select_dtypes(exclude=[np.number]).columns.tolist()\n",
|
142 |
+
"\n",
|
143 |
+
"\n",
|
144 |
+
"'''creating copy of the categorical features and numerical features\n",
|
145 |
+
"before imputing null value to avoid modifying the orginal dataset'''\n",
|
146 |
+
"\n",
|
147 |
+
"X_train_cat = X_train[cat_cols].copy()\n",
|
148 |
+
"X_train_num = X_train[num_cols].copy()\n",
|
149 |
+
"\n",
|
150 |
+
"X_test_cat = X_test[cat_cols].copy()\n",
|
151 |
+
"X_test_num = X_test[num_cols].copy()\n",
|
152 |
+
"\n",
|
153 |
+
"from sklearn.impute import SimpleImputer\n",
|
154 |
+
"from sklearn.pipeline import Pipeline\n",
|
155 |
+
"from sklearn.preprocessing import OneHotEncoder\n",
|
156 |
+
"from sklearn.compose import ColumnTransformer\n",
|
157 |
+
"\n",
|
158 |
+
"# Creating imputer variables\n",
|
159 |
+
"numerical_imputer = SimpleImputer(strategy = \"mean\")\n",
|
160 |
+
"categorical_imputer = SimpleImputer(strategy = \"most_frequent\")\n",
|
161 |
+
"\n",
|
162 |
+
"\n",
|
163 |
+
"# Define the column transformer\n",
|
164 |
+
"categorical_features = cat_cols\n",
|
165 |
+
"categorical_transformer = Pipeline(steps=[\n",
|
166 |
+
" ('onehot', OneHotEncoder(handle_unknown='ignore', categories='auto', sparse=False))\n",
|
167 |
+
"])\n",
|
168 |
+
"preprocessor = ColumnTransformer(\n",
|
169 |
+
" transformers=[\n",
|
170 |
+
" ('cat', categorical_transformer, categorical_features)\n",
|
171 |
+
" ])\n",
|
172 |
+
"\n",
|
173 |
+
"# Fitting the Imputer\n",
|
174 |
+
"X_train_cat_imputed = categorical_imputer.fit_transform(X_train_cat)\n",
|
175 |
+
"X_train_num_imputed = numerical_imputer.fit_transform(X_train_num)\n",
|
176 |
+
"\n",
|
177 |
+
"X_test_cat_imputed = categorical_imputer.fit_transform(X_test_cat)\n",
|
178 |
+
"X_test_num_imputed = numerical_imputer.fit_transform(X_test_num)\n",
|
179 |
+
"\n",
|
180 |
+
"encoder=OneHotEncoder(handle_unknown='ignore')\n",
|
181 |
+
"\n",
|
182 |
+
"# encoding the xtrain categories and converting to a dataframe\n",
|
183 |
+
"X_train_cat_encoded = encoder.fit(X_train_cat_imputed)\n",
|
184 |
+
"X_train_cat_encoded = pd.DataFrame(encoder.transform(X_train_cat_imputed).toarray(),\n",
|
185 |
+
" columns=encoder.get_feature_names_out(cat_cols))\n",
|
186 |
+
"\n",
|
187 |
+
"# encoding the xeval categories and converting to a dataframe\n",
|
188 |
+
"X_test_cat_encoded = encoder.fit(X_test_cat_imputed)\n",
|
189 |
+
"X_test_cat_encoded = pd.DataFrame(encoder.transform(X_test_cat_imputed).toarray(),\n",
|
190 |
+
" columns=encoder.get_feature_names_out(cat_cols))\n",
|
191 |
+
"\n",
|
192 |
+
"\n",
|
193 |
+
"from sklearn.preprocessing import StandardScaler\n",
|
194 |
+
"\n",
|
195 |
+
"scaler= StandardScaler()\n",
|
196 |
+
"\n",
|
197 |
+
"X_train_num_scaled = scaler.fit_transform(X_train_num_imputed)\n",
|
198 |
+
"X_train_num_sc = pd.DataFrame(X_train_num_scaled, columns = num_cols)\n",
|
199 |
+
"\n",
|
200 |
+
"X_test_num_scaled = scaler.fit_transform(X_test_num_imputed)\n",
|
201 |
+
"X_test_num_sc = pd.DataFrame(X_test_num_scaled, columns = num_cols)\n",
|
202 |
+
"\n",
|
203 |
+
"X_train_df = pd.concat([X_train_num_sc,X_train_cat_encoded], axis =1)\n",
|
204 |
+
"X_test_df = pd.concat([X_test_num_sc,X_test_cat_encoded], axis =1)\n",
|
205 |
+
"\n",
|
206 |
+
"\n",
|
207 |
+
"#Training over SMOTE-balanced data with roc_auc scoring \n",
|
208 |
+
"\n",
|
209 |
+
"\n",
|
210 |
+
"from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier\n",
|
211 |
+
"from sklearn.linear_model import LogisticRegression\n",
|
212 |
+
"from sklearn.svm import SVC\n",
|
213 |
+
"from sklearn.model_selection import cross_val_score\n",
|
214 |
+
"from sklearn.metrics import roc_auc_score\n",
|
215 |
+
"from imblearn.over_sampling import SMOTE\n",
|
216 |
+
"\n",
|
217 |
+
"# initialize SMOTE\n",
|
218 |
+
"sm = SMOTE(random_state=42)\n",
|
219 |
+
"\n",
|
220 |
+
"# fit SMOTE on the training data and resample it\n",
|
221 |
+
"X_train_resampled, y_train_resampled = sm.fit_resample(X_train_df, y_train)\n",
|
222 |
+
"\n",
|
223 |
+
"# print class counts before and after SMOTE\n",
|
224 |
+
"print(f'Class counts before SMOTE: {y_train.value_counts()}')\n",
|
225 |
+
"print(f'Class counts after SMOTE: {y_train_resampled.value_counts()}')\n",
|
226 |
+
"\n",
|
227 |
+
"# create a dictionary of models to fit\n",
|
228 |
+
"models = {\n",
|
229 |
+
" 'AdaBoost Classifier': AdaBoostClassifier(),\n",
|
230 |
+
" 'Logistic Regression Classifier': LogisticRegression(),\n",
|
231 |
+
" 'Random Forest Classifier': RandomForestClassifier(),\n",
|
232 |
+
" 'Gradient Boosting Classifier': GradientBoostingClassifier(),\n",
|
233 |
+
" 'SVM Classifier': SVC(probability=True)\n",
|
234 |
+
"}\n",
|
235 |
+
"\n",
|
236 |
+
"# iterate over the models and fit each one to the resampled training data\n",
|
237 |
+
"for name, model in models.items():\n",
|
238 |
+
" model.fit(X_train_resampled, y_train_resampled)\n",
|
239 |
+
" \n",
|
240 |
+
"# evaluate each model using cross-validation based on ROC-AUC\n",
|
241 |
+
"roc_auc_scores = {}\n",
|
242 |
+
"for name, model in models.items():\n",
|
243 |
+
" scores = cross_val_score(model, X_train_resampled, y_train_resampled, cv=5, scoring='roc_auc')\n",
|
244 |
+
" roc_auc_scores[name] = scores.mean()\n",
|
245 |
+
" \n",
|
246 |
+
"# print the ROC-AUC scores for each model\n",
|
247 |
+
"for name, score in roc_auc_scores.items():\n",
|
248 |
+
" print(f'{name}: {score}')\n",
|
249 |
+
"\n",
|
250 |
+
"# choose the model with the highest ROC-AUC score\n",
|
251 |
+
"best_model_name = max(roc_auc_scores, key=roc_auc_scores.get)\n",
|
252 |
+
"best_model = models[best_model_name]\n",
|
253 |
+
"print(f'Best model: {best_model_name}')\n",
|
254 |
+
"\n",
|
255 |
+
"from sklearn.metrics import classification_report\n",
|
256 |
+
"\n",
|
257 |
+
"# iterate over the models and make predictions on the test data for each one\n",
|
258 |
+
"for name, model in models.items():\n",
|
259 |
+
" # fit the model to the resampled training data\n",
|
260 |
+
" model.fit(X_train_resampled, y_train_resampled)\n",
|
261 |
+
" # make predictions on the test data\n",
|
262 |
+
" y_pred = model.predict(X_test_df)\n",
|
263 |
+
" # generate the classification report\n",
|
264 |
+
" report = classification_report(y_test, y_pred)\n",
|
265 |
+
" # print the classification report\n",
|
266 |
+
" print(f'{name} classification report:\\n{report}\\n')\n"
|
267 |
+
]
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"cell_type": "code",
|
271 |
+
"execution_count": 10,
|
272 |
+
"id": "4aab6799",
|
273 |
+
"metadata": {},
|
274 |
+
"outputs": [
|
275 |
+
{
|
276 |
+
"data": {
|
277 |
+
"text/plain": [
|
278 |
+
"['SeniorCitizen',\n",
|
279 |
+
" 'tenure',\n",
|
280 |
+
" 'MonthlyCharges',\n",
|
281 |
+
" 'TotalCharges',\n",
|
282 |
+
" 'gender_Female',\n",
|
283 |
+
" 'gender_Male',\n",
|
284 |
+
" 'Partner_No',\n",
|
285 |
+
" 'Partner_Yes',\n",
|
286 |
+
" 'Dependents_No',\n",
|
287 |
+
" 'Dependents_Yes',\n",
|
288 |
+
" 'PhoneService_No',\n",
|
289 |
+
" 'PhoneService_Yes',\n",
|
290 |
+
" 'MultipleLines_No',\n",
|
291 |
+
" 'MultipleLines_Yes',\n",
|
292 |
+
" 'InternetService_DSL',\n",
|
293 |
+
" 'InternetService_Fiber optic',\n",
|
294 |
+
" 'InternetService_No',\n",
|
295 |
+
" 'OnlineSecurity_No',\n",
|
296 |
+
" 'OnlineSecurity_Yes',\n",
|
297 |
+
" 'OnlineBackup_No',\n",
|
298 |
+
" 'OnlineBackup_Yes',\n",
|
299 |
+
" 'DeviceProtection_No',\n",
|
300 |
+
" 'DeviceProtection_Yes',\n",
|
301 |
+
" 'TechSupport_No',\n",
|
302 |
+
" 'TechSupport_Yes',\n",
|
303 |
+
" 'StreamingTV_No',\n",
|
304 |
+
" 'StreamingTV_Yes',\n",
|
305 |
+
" 'StreamingMovies_No',\n",
|
306 |
+
" 'StreamingMovies_Yes',\n",
|
307 |
+
" 'Contract_Month-to-month',\n",
|
308 |
+
" 'Contract_One year',\n",
|
309 |
+
" 'Contract_Two year',\n",
|
310 |
+
" 'PaperlessBilling_No',\n",
|
311 |
+
" 'PaperlessBilling_Yes',\n",
|
312 |
+
" 'PaymentMethod_Bank transfer (automatic)',\n",
|
313 |
+
" 'PaymentMethod_Credit card (automatic)',\n",
|
314 |
+
" 'PaymentMethod_Electronic check',\n",
|
315 |
+
" 'PaymentMethod_Mailed check']"
|
316 |
+
]
|
317 |
+
},
|
318 |
+
"execution_count": 10,
|
319 |
+
"metadata": {},
|
320 |
+
"output_type": "execute_result"
|
321 |
+
}
|
322 |
+
],
|
323 |
+
"source": [
|
324 |
+
"X_train_df.columns.tolist()"
|
325 |
+
]
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"cell_type": "code",
|
329 |
+
"execution_count": 8,
|
330 |
+
"id": "d53e6b9e",
|
331 |
+
"metadata": {},
|
332 |
+
"outputs": [
|
333 |
+
{
|
334 |
+
"name": "stdout",
|
335 |
+
"output_type": "stream",
|
336 |
+
"text": [
|
337 |
+
"Column 'gender' categories: ['Female' 'Male']\n",
|
338 |
+
"Column 'SeniorCitizen' categories: [0 1]\n",
|
339 |
+
"Column 'Partner' categories: ['Yes' 'No']\n",
|
340 |
+
"Column 'Dependents' categories: ['No' 'Yes']\n",
|
341 |
+
"Column 'tenure' categories: [ 1 34 2 45 8 22 10 28 62 13 16 58 49 25 69 52 71 21 12 30 47 72 17 27\n",
|
342 |
+
" 5 46 11 70 63 43 15 60 18 66 9 3 31 50 64 56 7 42 35 48 29 65 38 68\n",
|
343 |
+
" 32 55 37 36 41 6 4 33 67 23 57 61 14 20 53 40 59 24 44 19 54 51 26 0\n",
|
344 |
+
" 39]\n",
|
345 |
+
"Column 'PhoneService' categories: ['No' 'Yes']\n",
|
346 |
+
"Column 'MultipleLines' categories: ['No' 'Yes']\n",
|
347 |
+
"Column 'InternetService' categories: ['DSL' 'Fiber optic' 'No']\n",
|
348 |
+
"Column 'OnlineSecurity' categories: ['No' 'Yes']\n",
|
349 |
+
"Column 'OnlineBackup' categories: ['Yes' 'No']\n",
|
350 |
+
"Column 'DeviceProtection' categories: ['No' 'Yes']\n",
|
351 |
+
"Column 'TechSupport' categories: ['No' 'Yes']\n",
|
352 |
+
"Column 'StreamingTV' categories: ['No' 'Yes']\n",
|
353 |
+
"Column 'StreamingMovies' categories: ['No' 'Yes']\n",
|
354 |
+
"Column 'Contract' categories: ['Month-to-month' 'One year' 'Two year']\n",
|
355 |
+
"Column 'PaperlessBilling' categories: ['Yes' 'No']\n",
|
356 |
+
"Column 'PaymentMethod' categories: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)'\n",
|
357 |
+
" 'Credit card (automatic)']\n",
|
358 |
+
"Column 'MonthlyCharges' categories: [29.85 56.95 53.85 ... 63.1 44.2 78.7 ]\n",
|
359 |
+
"Column 'TotalCharges' categories: [ 29.85 1889.5 108.15 ... 346.45 306.6 6844.5 ]\n"
|
360 |
+
]
|
361 |
+
}
|
362 |
+
],
|
363 |
+
"source": [
|
364 |
+
"for col in X.columns:\n",
|
365 |
+
" print(f\"Column '{col}' categories: {X[col].unique()}\")"
|
366 |
+
]
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"cell_type": "code",
|
370 |
+
"execution_count": 3,
|
371 |
+
"id": "b6f7708a",
|
372 |
+
"metadata": {},
|
373 |
+
"outputs": [
|
374 |
+
{
|
375 |
+
"name": "stdout",
|
376 |
+
"output_type": "stream",
|
377 |
+
"text": [
|
378 |
+
"Best model: Random Forest Classifier\n"
|
379 |
+
]
|
380 |
+
}
|
381 |
+
],
|
382 |
+
"source": [
|
383 |
+
"best_model_name = 'Random Forest Classifier'\n",
|
384 |
+
"\n",
|
385 |
+
"best_model = models[best_model_name]\n",
|
386 |
+
"\n",
|
387 |
+
"print(f'Best model: {best_model_name}')"
|
388 |
+
]
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"cell_type": "code",
|
392 |
+
"execution_count": 9,
|
393 |
+
"id": "2adb8c7e",
|
394 |
+
"metadata": {},
|
395 |
+
"outputs": [
|
396 |
+
{
|
397 |
+
"name": "stdout",
|
398 |
+
"output_type": "stream",
|
399 |
+
"text": [
|
400 |
+
" precision recall f1-score support\n",
|
401 |
+
"\n",
|
402 |
+
" No 0.85 0.86 0.86 1053\n",
|
403 |
+
" Yes 0.57 0.56 0.56 352\n",
|
404 |
+
"\n",
|
405 |
+
" accuracy 0.78 1405\n",
|
406 |
+
" macro avg 0.71 0.71 0.71 1405\n",
|
407 |
+
"weighted avg 0.78 0.78 0.78 1405\n",
|
408 |
+
"\n"
|
409 |
+
]
|
410 |
+
}
|
411 |
+
],
|
412 |
+
"source": [
|
413 |
+
"# Calculate the class weights\n",
|
414 |
+
"class_weight = {\"No\": 1, \"Yes\": 10}\n",
|
415 |
+
"\n",
|
416 |
+
"# Initialize Logistic Regression model with class weights\n",
|
417 |
+
"rf = RandomForestClassifier(class_weight=class_weight)\n",
|
418 |
+
"\n",
|
419 |
+
"# Fit the model to the training data\n",
|
420 |
+
"rf.fit(X_train_resampled, y_train_resampled)\n",
|
421 |
+
"\n",
|
422 |
+
"# Predict the labels of the test set\n",
|
423 |
+
"y_pred = rf.predict(X_test_df)\n",
|
424 |
+
"\n",
|
425 |
+
"# Generate the classification report\n",
|
426 |
+
"report = classification_report(y_test, y_pred)\n",
|
427 |
+
"print(report)"
|
428 |
+
]
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"cell_type": "code",
|
432 |
+
"execution_count": 4,
|
433 |
+
"id": "3ca066e7",
|
434 |
+
"metadata": {
|
435 |
+
"scrolled": true
|
436 |
+
},
|
437 |
+
"outputs": [],
|
438 |
+
"source": [
|
439 |
+
"from joblib import dump\n",
|
440 |
+
"import os\n",
|
441 |
+
"\n",
|
442 |
+
"# set the destination path to the \"export\" directory\n",
|
443 |
+
"destination = \".\"\n",
|
444 |
+
"\n",
|
445 |
+
"# create a dictionary to store the objects and their filenames\n",
|
446 |
+
"models = {\"numerical_imputer\": numerical_imputer,\n",
|
447 |
+
" \"categorical_imputer\": categorical_imputer,\n",
|
448 |
+
" \"encoder\": encoder,\n",
|
449 |
+
" \"scaler\": scaler,\n",
|
450 |
+
" \"Final_model\": best_model}\n",
|
451 |
+
"\n",
|
452 |
+
"# loop through the models and save them using joblib.dump()\n",
|
453 |
+
"for name, model in models.items():\n",
|
454 |
+
" dump(model, os.path.join(destination, f\"{name}.joblib\"))\n"
|
455 |
+
]
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"cell_type": "code",
|
459 |
+
"execution_count": 10,
|
460 |
+
"id": "2185d2f9",
|
461 |
+
"metadata": {},
|
462 |
+
"outputs": [],
|
463 |
+
"source": [
|
464 |
+
"#!pip freeze > requirements.txt"
|
465 |
+
]
|
466 |
+
},
|
467 |
+
{
|
468 |
+
"cell_type": "code",
|
469 |
+
"execution_count": 6,
|
470 |
+
"id": "8117c959",
|
471 |
+
"metadata": {},
|
472 |
+
"outputs": [
|
473 |
+
{
|
474 |
+
"name": "stderr",
|
475 |
+
"output_type": "stream",
|
476 |
+
"text": [
|
477 |
+
"INFO: Successfully saved requirements file in .\\requirements.txt\n"
|
478 |
+
]
|
479 |
+
}
|
480 |
+
],
|
481 |
+
"source": [
|
482 |
+
"!pipreqs . --force"
|
483 |
+
]
|
484 |
+
},
|
485 |
+
{
|
486 |
+
"cell_type": "code",
|
487 |
+
"execution_count": 11,
|
488 |
+
"id": "33af820b",
|
489 |
+
"metadata": {},
|
490 |
+
"outputs": [],
|
491 |
+
"source": [
|
492 |
+
"#!pip list --format=freeze > requirements.txt"
|
493 |
+
]
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"cell_type": "code",
|
497 |
+
"execution_count": 5,
|
498 |
+
"id": "816b3fe9",
|
499 |
+
"metadata": {},
|
500 |
+
"outputs": [
|
501 |
+
{
|
502 |
+
"name": "stdout",
|
503 |
+
"output_type": "stream",
|
504 |
+
"text": [
|
505 |
+
"numerical_imputer saved successfully!\n",
|
506 |
+
"categorical_imputer saved successfully!\n",
|
507 |
+
"encoder saved successfully!\n",
|
508 |
+
"scaler saved successfully!\n",
|
509 |
+
"Final_model saved successfully!\n"
|
510 |
+
]
|
511 |
+
}
|
512 |
+
],
|
513 |
+
"source": [
|
514 |
+
"for name, model in models.items():\n",
|
515 |
+
" dump(model, os.path.join(destination, f\"{name}.joblib\"))\n",
|
516 |
+
" if os.path.exists(os.path.join(destination, f\"{name}.joblib\")):\n",
|
517 |
+
" print(f\"{name} saved successfully!\")\n",
|
518 |
+
" else:\n",
|
519 |
+
" print(f\"{name} failed to save.\")\n"
|
520 |
+
]
|
521 |
+
},
|
522 |
+
{
|
523 |
+
"cell_type": "code",
|
524 |
+
"execution_count": 90,
|
525 |
+
"id": "5143eadb",
|
526 |
+
"metadata": {},
|
527 |
+
"outputs": [],
|
528 |
+
"source": [
|
529 |
+
"destination = \".\"\n",
|
530 |
+
"numerical_imputer = joblib.load(os.path.join(destination, \"numerical_imputer.joblib\"))\n",
|
531 |
+
"categorical_imputer = joblib.load(os.path.join(destination, \"categorical_imputer.joblib\"))\n",
|
532 |
+
"encoder = joblib.load(os.path.join(destination, \"encoder.joblib\"))\n",
|
533 |
+
"scaler = joblib.load(os.path.join(destination, \"scaler.joblib\"))\n",
|
534 |
+
"best_model = joblib.load(os.path.join(destination, \"Final_model.joblib\"))\n",
|
535 |
+
"\n",
|
536 |
+
"loaded_models = {\"numerical_imputer\": numerical_imputer,\n",
|
537 |
+
" \"categorical_imputer\": categorical_imputer,\n",
|
538 |
+
" \"encoder\": encoder,\n",
|
539 |
+
" \"scaler\": scaler,\n",
|
540 |
+
" \"Final_model\": best_model}\n"
|
541 |
+
]
|
542 |
+
}
|
543 |
+
],
|
544 |
+
"metadata": {
|
545 |
+
"kernelspec": {
|
546 |
+
"display_name": "Python 3 (ipykernel)",
|
547 |
+
"language": "python",
|
548 |
+
"name": "python3"
|
549 |
+
},
|
550 |
+
"language_info": {
|
551 |
+
"codemirror_mode": {
|
552 |
+
"name": "ipython",
|
553 |
+
"version": 3
|
554 |
+
},
|
555 |
+
"file_extension": ".py",
|
556 |
+
"mimetype": "text/x-python",
|
557 |
+
"name": "python",
|
558 |
+
"nbconvert_exporter": "python",
|
559 |
+
"pygments_lexer": "ipython3",
|
560 |
+
"version": "3.9.12"
|
561 |
+
}
|
562 |
+
},
|
563 |
+
"nbformat": 4,
|
564 |
+
"nbformat_minor": 5
|
565 |
+
}
|
README.md
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# churn-prediction-with-gradio
|
2 |
+
This repository contains code and resources for building a churn prediction model using machine learning techniques, and deploying it with Gradio for a user-friendly interface. Gradio is used to create a web interface for the trained model, which allows users to input customer data and get predictions on their likelihood of churning.
|
3 |
+
|
4 |
+
## Summary
|
5 |
+
| Code | Name | Published Article | Deployed App |
|
6 |
+
|-----------|-------------|:-------------:|------:|
|
7 |
+
| LP4 | Churn Prediction with Gradio | [-](/) | [-](/) |
|
8 |
+
|
9 |
+
## Project Description
|
10 |
+
Churn prediction is a critical task for businesses that want to retain their customers and optimize revenue. This repository contains code and resources for building a churn prediction model using machine learning techniques, and deploying it with Gradio for a user-friendly interface.
|
11 |
+
|
12 |
+
The code includes data preprocessing, feature engineering, model training, and evaluation using Python and popular machine learning libraries such as Scikit-learn and XGBoost. The trained model is then deployed using Gradio, which allows users to input customer data and get predictions on their likelihood of churning. The Gradio interface is intuitive and easy to use, even for non-technical users.
|
13 |
+
|
14 |
+
The repository includes a demo notebook that showcases how to use the trained model in the Gradio interface, as well as instructions for reproducing the project. This project can be useful for anyone interested in learning how to build a churn prediction model and deploy it with Gradio.
|
15 |
+
|
16 |
+
## Setup
|
17 |
+
|
18 |
+
## Installation
|
19 |
+
Download or Clone the repository and navigate to the project directory. Clone this repository to your local machine using the following command:
|
20 |
+
|
21 |
+
git clone -
|
22 |
+
|
23 |
+
Alternatively, you can visit:
|
24 |
+
|
25 |
+
-
|
26 |
+
|
27 |
+
|
28 |
+
## Install the dependencies
|
29 |
+
|
30 |
+
Navigate to the cloned repository and run the command:
|
31 |
+
|
32 |
+
pip install -r requirements.txt
|
33 |
+
|
34 |
+
## App Execution
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+

|
39 |
+
|
40 |
+
First step select the gender and the select whether he/she is a senior Citizen. The key is prpvided that indicates 0 is for NO and 1 is for a YES. Also choose if the customer has a partner.
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+

|
45 |
+
|
46 |
+
Select if the customer has any dependents.
|
47 |
+
|
48 |
+
|
49 |
+

|
50 |
+
|
51 |
+
|
52 |
+
Next, input the length of the tenure in months, slect if the customer has the following; Phoneservice, multiple lines, Internetservice, Onlinesecurity and onlinebackup.
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+

|
57 |
+
|
58 |
+
Next, choose if the customer has the following; Deviceprotection, Techsupport, StreamingTV and streamingMovies.
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+

|
63 |
+
|
64 |
+
Select if the cutomer prefers paperlessbilling. Also select the Paymentmethod, and enter the Monthly charges together with the Total charges.
|
65 |
+
|
66 |
+
|
67 |
+

|
68 |
+
|
69 |
+
Lastly submit the values and click on the predict button to the prediction.
|
70 |
+
|
71 |
+

|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
## Author
|
76 |
+
Alberta Cofie
|
77 |
+
Data Analyst
|
Telco-Customer-Churn.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
categorical_imputer.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f017356e7d2e4095809832b8b94a33457fff3440bc28ae419d0f0e9542743e1
|
3 |
+
size 1367
|
encoder.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f140674e233a2e46ba9bad22a7b106f0f0294488282e86a3985b1b2e1e66ea6
|
3 |
+
size 4472
|
numerical_imputer.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dfe94a5d59b51f5e246ced672cd89d1c5d16209c7494aba20adf0160c8cd2d6
|
3 |
+
size 854
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.24.1
|
2 |
+
numpy==1.24.2
|
3 |
+
pandas==1.5.3
|
4 |
+
Pillow==9.5.0
|
5 |
+
scikit_learn==1.2.2
|
scaler.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f46a4b4768cdf9f12bce8640ed160cc306d8631ce5b9424ac42e1f7855d5fdb
|
3 |
+
size 632
|