ErenKontas commited on
Commit
b211d1e
·
verified ·
1 Parent(s): 2f23acb

Upload 6 files

Browse files
Files changed (6) hide show
  1. Veri.pkl +3 -0
  2. app.py +101 -0
  3. dataset.csv +0 -0
  4. encoder.pkl +3 -0
  5. requirements.txt +4 -0
  6. scaler.pkl +3 -0
Veri.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805efaf38201cbb3716af3195913dd1cc6e1f5dd04859e53f2ae115b0c15f8b4
3
+ size 8980
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import joblib
4
+
5
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
+ from sklearn.compose import ColumnTransformer
7
+
8
+ # Load data and update column names
9
+ df = pd.read_csv('dataset.csv')
10
+ df = df.drop(['id','Class'], axis=1)
11
+ df.columns = df.columns.str.replace(r'[\s\.]', '_', regex=True)
12
+ df.columns = df.columns.str.replace(r'Gender&Type', 'Gender_Type', regex=True)
13
+ df.columns = df.columns.str.replace(r'#', '', regex=True)
14
+
15
+ df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']] = df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int)
16
+ df[['InstallmentCredit', 'Yearly_Period']] = df[['InstallmentCredit', 'Yearly_Period']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(float)
17
+
18
+ # Select dependent and independent variables
19
+ x = df
20
+
21
+ # Preprocessing (StandardScaler)
22
+ preprocessor = ColumnTransformer(
23
+ transformers=[
24
+ ('num', StandardScaler(), ["Monthly_Period", "Credit1", "InstallmentRate", "Tenancy_Period", "Age", "Credits", "Authorities", "InstallmentCredit", "Yearly_Period"]),
25
+ ('cat', OneHotEncoder(), ["Account1", "History", "Motive", "Account2", "Employment_Period", "Gender_Type", "Sponsors", "Plotsize", "Plan", "Housing", "Post", "Phone", "Expatriate"])
26
+ ]
27
+ )
28
+
29
+ # Streamlit application
30
+ def cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period,
31
+ Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period):
32
+ input_data = pd.DataFrame({
33
+ 'Account1': [Account1],
34
+ 'Monthly_Period': [Monthly_Period],
35
+ 'History': [History],
36
+ 'Motive': [Motive],
37
+ 'Credit1': [Credit1],
38
+ 'Account2': [Account2],
39
+ 'Employment_Period': [Employment_Period],
40
+ 'InstallmentRate': [InstallmentRate],
41
+ 'Gender_Type': [Gender_Type],
42
+ 'Sponsors': [Sponsors],
43
+ 'Tenancy_Period': [Tenancy_Period],
44
+ 'Plotsize': [Plotsize],
45
+ 'Age': [Age],
46
+ 'Plan': [Plan],
47
+ 'Housing': [Housing],
48
+ 'Credits': [Credits],
49
+ 'Post': [Post],
50
+ 'Authorities': [Authorities],
51
+ 'Phone': [Phone],
52
+ 'Expatriate': [Expatriate],
53
+ 'InstallmentCredit': [InstallmentCredit],
54
+ 'Yearly_Period': [Yearly_Period]
55
+ })
56
+
57
+ model = joblib.load('Veri.pkl')
58
+ scaler = StandardScaler()
59
+
60
+ input_data_transformed = pd.get_dummies(input_data, drop_first=True)
61
+ input_data_transformed = input_data_transformed.reindex(columns=x.columns, fill_value=0)
62
+ input_data_transformed = scaler.transform(input_data_transformed)
63
+
64
+
65
+ input_data_transformed = preprocessor.fit_transform(input_data)
66
+
67
+
68
+
69
+ prediction = model.predict(input_data_transformed)
70
+ return float(prediction[0])
71
+
72
+ st.title("KMeans Clustering Model")
73
+ st.write("Enter Input Data to Predict Cluster")
74
+
75
+ Account1 = st.selectbox('Account1', df['Account1'].unique())
76
+ Monthly_Period = st.slider('Monthly_Period', int(df['Monthly_Period'].min()), int(df['Monthly_Period'].max()))
77
+ History = st.selectbox('History', df['History'].unique())
78
+ Motive = st.selectbox('Motive', df['Motive'].unique())
79
+ Credit1 = st.slider('Credit1', int(df['Credit1'].min()), int(df['Credit1'].max()))
80
+ Account2 = st.selectbox('Account2', df['Account2'].unique())
81
+ Employment_Period = st.selectbox('Employment_Period', df['Employment_Period'].unique())
82
+ InstallmentRate = st.slider('InstallmentRate', int(df['InstallmentRate'].min()), int(df['InstallmentRate'].max()))
83
+ Gender_Type = st.selectbox('Gender_Type', df['Gender_Type'].unique())
84
+ Sponsors = st.selectbox('Sponsors', df['Sponsors'].unique())
85
+ Tenancy_Period = st.slider('Tenancy_Period', int(df['Tenancy_Period'].min()), int(df['Tenancy_Period'].max()))
86
+ Plotsize = st.selectbox('Plotsize', df['Plotsize'].unique())
87
+ Age = st.slider('Age', int(df['Age'].min()), int(df['Age'].max()))
88
+ Plan = st.selectbox('Plan', df['Plan'].unique())
89
+ Housing = st.selectbox('Housing', df['Housing'].unique())
90
+ Credits = st.slider('Credits', float(df['Credits'].min()), float(df['Credits'].max()))
91
+ Post = st.selectbox('Post', df['Post'].unique())
92
+ Authorities = st.slider('Authorities', float(df['Authorities'].min()), float(df['Authorities'].max()))
93
+ Phone = st.selectbox('Phone', df['Phone'].unique())
94
+ Expatriate = st.selectbox('Expatriate', [True, False])
95
+ InstallmentCredit = st.slider('InstallmentCredit', float(df['InstallmentCredit'].min()), float(df['InstallmentCredit'].max()))
96
+ Yearly_Period = st.slider('Yearly_Period', float(df['Yearly_Period'].min()), float(df['Yearly_Period'].max()))
97
+
98
+ if st.button('Predict Cluster'):
99
+ cluster = cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period,
100
+ Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period)
101
+ st.write(f'The predicted cluster is: {cluster}')
dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2003d9a0f3831c59a10a17d937a1eb114d2a78ccab82d964ffe4e0ff52499d2
3
+ size 271
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ pandas
4
+ tensorflow
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c85f548747e9516323485c728c301bebac9fbdde5309a017de33739ddb6514ab
3
+ size 129