Spaces:
Sleeping
Sleeping
ErenKontas
commited on
Upload 6 files
Browse files- Veri.pkl +3 -0
- app.py +101 -0
- dataset.csv +0 -0
- encoder.pkl +3 -0
- requirements.txt +4 -0
- scaler.pkl +3 -0
Veri.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:805efaf38201cbb3716af3195913dd1cc6e1f5dd04859e53f2ae115b0c15f8b4
|
3 |
+
size 8980
|
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import joblib
|
4 |
+
|
5 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
6 |
+
from sklearn.compose import ColumnTransformer
|
7 |
+
|
8 |
+
# Load data and update column names
|
9 |
+
df = pd.read_csv('dataset.csv')
|
10 |
+
df = df.drop(['id','Class'], axis=1)
|
11 |
+
df.columns = df.columns.str.replace(r'[\s\.]', '_', regex=True)
|
12 |
+
df.columns = df.columns.str.replace(r'Gender&Type', 'Gender_Type', regex=True)
|
13 |
+
df.columns = df.columns.str.replace(r'#', '', regex=True)
|
14 |
+
|
15 |
+
df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']] = df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int)
|
16 |
+
df[['InstallmentCredit', 'Yearly_Period']] = df[['InstallmentCredit', 'Yearly_Period']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(float)
|
17 |
+
|
18 |
+
# Select dependent and independent variables
|
19 |
+
x = df
|
20 |
+
|
21 |
+
# Preprocessing (StandardScaler)
|
22 |
+
preprocessor = ColumnTransformer(
|
23 |
+
transformers=[
|
24 |
+
('num', StandardScaler(), ["Monthly_Period", "Credit1", "InstallmentRate", "Tenancy_Period", "Age", "Credits", "Authorities", "InstallmentCredit", "Yearly_Period"]),
|
25 |
+
('cat', OneHotEncoder(), ["Account1", "History", "Motive", "Account2", "Employment_Period", "Gender_Type", "Sponsors", "Plotsize", "Plan", "Housing", "Post", "Phone", "Expatriate"])
|
26 |
+
]
|
27 |
+
)
|
28 |
+
|
29 |
+
# Streamlit application
|
30 |
+
def cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period,
|
31 |
+
Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period):
|
32 |
+
input_data = pd.DataFrame({
|
33 |
+
'Account1': [Account1],
|
34 |
+
'Monthly_Period': [Monthly_Period],
|
35 |
+
'History': [History],
|
36 |
+
'Motive': [Motive],
|
37 |
+
'Credit1': [Credit1],
|
38 |
+
'Account2': [Account2],
|
39 |
+
'Employment_Period': [Employment_Period],
|
40 |
+
'InstallmentRate': [InstallmentRate],
|
41 |
+
'Gender_Type': [Gender_Type],
|
42 |
+
'Sponsors': [Sponsors],
|
43 |
+
'Tenancy_Period': [Tenancy_Period],
|
44 |
+
'Plotsize': [Plotsize],
|
45 |
+
'Age': [Age],
|
46 |
+
'Plan': [Plan],
|
47 |
+
'Housing': [Housing],
|
48 |
+
'Credits': [Credits],
|
49 |
+
'Post': [Post],
|
50 |
+
'Authorities': [Authorities],
|
51 |
+
'Phone': [Phone],
|
52 |
+
'Expatriate': [Expatriate],
|
53 |
+
'InstallmentCredit': [InstallmentCredit],
|
54 |
+
'Yearly_Period': [Yearly_Period]
|
55 |
+
})
|
56 |
+
|
57 |
+
model = joblib.load('Veri.pkl')
|
58 |
+
scaler = StandardScaler()
|
59 |
+
|
60 |
+
input_data_transformed = pd.get_dummies(input_data, drop_first=True)
|
61 |
+
input_data_transformed = input_data_transformed.reindex(columns=x.columns, fill_value=0)
|
62 |
+
input_data_transformed = scaler.transform(input_data_transformed)
|
63 |
+
|
64 |
+
|
65 |
+
input_data_transformed = preprocessor.fit_transform(input_data)
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
prediction = model.predict(input_data_transformed)
|
70 |
+
return float(prediction[0])
|
71 |
+
|
72 |
+
st.title("KMeans Clustering Model")
|
73 |
+
st.write("Enter Input Data to Predict Cluster")
|
74 |
+
|
75 |
+
Account1 = st.selectbox('Account1', df['Account1'].unique())
|
76 |
+
Monthly_Period = st.slider('Monthly_Period', int(df['Monthly_Period'].min()), int(df['Monthly_Period'].max()))
|
77 |
+
History = st.selectbox('History', df['History'].unique())
|
78 |
+
Motive = st.selectbox('Motive', df['Motive'].unique())
|
79 |
+
Credit1 = st.slider('Credit1', int(df['Credit1'].min()), int(df['Credit1'].max()))
|
80 |
+
Account2 = st.selectbox('Account2', df['Account2'].unique())
|
81 |
+
Employment_Period = st.selectbox('Employment_Period', df['Employment_Period'].unique())
|
82 |
+
InstallmentRate = st.slider('InstallmentRate', int(df['InstallmentRate'].min()), int(df['InstallmentRate'].max()))
|
83 |
+
Gender_Type = st.selectbox('Gender_Type', df['Gender_Type'].unique())
|
84 |
+
Sponsors = st.selectbox('Sponsors', df['Sponsors'].unique())
|
85 |
+
Tenancy_Period = st.slider('Tenancy_Period', int(df['Tenancy_Period'].min()), int(df['Tenancy_Period'].max()))
|
86 |
+
Plotsize = st.selectbox('Plotsize', df['Plotsize'].unique())
|
87 |
+
Age = st.slider('Age', int(df['Age'].min()), int(df['Age'].max()))
|
88 |
+
Plan = st.selectbox('Plan', df['Plan'].unique())
|
89 |
+
Housing = st.selectbox('Housing', df['Housing'].unique())
|
90 |
+
Credits = st.slider('Credits', float(df['Credits'].min()), float(df['Credits'].max()))
|
91 |
+
Post = st.selectbox('Post', df['Post'].unique())
|
92 |
+
Authorities = st.slider('Authorities', float(df['Authorities'].min()), float(df['Authorities'].max()))
|
93 |
+
Phone = st.selectbox('Phone', df['Phone'].unique())
|
94 |
+
Expatriate = st.selectbox('Expatriate', [True, False])
|
95 |
+
InstallmentCredit = st.slider('InstallmentCredit', float(df['InstallmentCredit'].min()), float(df['InstallmentCredit'].max()))
|
96 |
+
Yearly_Period = st.slider('Yearly_Period', float(df['Yearly_Period'].min()), float(df['Yearly_Period'].max()))
|
97 |
+
|
98 |
+
if st.button('Predict Cluster'):
|
99 |
+
cluster = cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period,
|
100 |
+
Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period)
|
101 |
+
st.write(f'The predicted cluster is: {cluster}')
|
dataset.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
encoder.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2003d9a0f3831c59a10a17d937a1eb114d2a78ccab82d964ffe4e0ff52499d2
|
3 |
+
size 271
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
scikit-learn
|
3 |
+
pandas
|
4 |
+
tensorflow
|
scaler.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c85f548747e9516323485c728c301bebac9fbdde5309a017de33739ddb6514ab
|
3 |
+
size 129
|