Veri / app.py
ErenKontas's picture
Upload 6 files
b211d1e verified
import pandas as pd
import streamlit as st
import joblib
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
# Load data and update column names
df = pd.read_csv('dataset.csv')
df = df.drop(['id','Class'], axis=1)
df.columns = df.columns.str.replace(r'[\s\.]', '_', regex=True)
df.columns = df.columns.str.replace(r'Gender&Type', 'Gender_Type', regex=True)
df.columns = df.columns.str.replace(r'#', '', regex=True)
df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']] = df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int)
df[['InstallmentCredit', 'Yearly_Period']] = df[['InstallmentCredit', 'Yearly_Period']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(float)
# Select dependent and independent variables
x = df
# Preprocessing (StandardScaler)
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ["Monthly_Period", "Credit1", "InstallmentRate", "Tenancy_Period", "Age", "Credits", "Authorities", "InstallmentCredit", "Yearly_Period"]),
('cat', OneHotEncoder(), ["Account1", "History", "Motive", "Account2", "Employment_Period", "Gender_Type", "Sponsors", "Plotsize", "Plan", "Housing", "Post", "Phone", "Expatriate"])
]
)
# Streamlit application
def cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period,
Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period):
input_data = pd.DataFrame({
'Account1': [Account1],
'Monthly_Period': [Monthly_Period],
'History': [History],
'Motive': [Motive],
'Credit1': [Credit1],
'Account2': [Account2],
'Employment_Period': [Employment_Period],
'InstallmentRate': [InstallmentRate],
'Gender_Type': [Gender_Type],
'Sponsors': [Sponsors],
'Tenancy_Period': [Tenancy_Period],
'Plotsize': [Plotsize],
'Age': [Age],
'Plan': [Plan],
'Housing': [Housing],
'Credits': [Credits],
'Post': [Post],
'Authorities': [Authorities],
'Phone': [Phone],
'Expatriate': [Expatriate],
'InstallmentCredit': [InstallmentCredit],
'Yearly_Period': [Yearly_Period]
})
model = joblib.load('Veri.pkl')
scaler = StandardScaler()
input_data_transformed = pd.get_dummies(input_data, drop_first=True)
input_data_transformed = input_data_transformed.reindex(columns=x.columns, fill_value=0)
input_data_transformed = scaler.transform(input_data_transformed)
input_data_transformed = preprocessor.fit_transform(input_data)
prediction = model.predict(input_data_transformed)
return float(prediction[0])
st.title("KMeans Clustering Model")
st.write("Enter Input Data to Predict Cluster")
Account1 = st.selectbox('Account1', df['Account1'].unique())
Monthly_Period = st.slider('Monthly_Period', int(df['Monthly_Period'].min()), int(df['Monthly_Period'].max()))
History = st.selectbox('History', df['History'].unique())
Motive = st.selectbox('Motive', df['Motive'].unique())
Credit1 = st.slider('Credit1', int(df['Credit1'].min()), int(df['Credit1'].max()))
Account2 = st.selectbox('Account2', df['Account2'].unique())
Employment_Period = st.selectbox('Employment_Period', df['Employment_Period'].unique())
InstallmentRate = st.slider('InstallmentRate', int(df['InstallmentRate'].min()), int(df['InstallmentRate'].max()))
Gender_Type = st.selectbox('Gender_Type', df['Gender_Type'].unique())
Sponsors = st.selectbox('Sponsors', df['Sponsors'].unique())
Tenancy_Period = st.slider('Tenancy_Period', int(df['Tenancy_Period'].min()), int(df['Tenancy_Period'].max()))
Plotsize = st.selectbox('Plotsize', df['Plotsize'].unique())
Age = st.slider('Age', int(df['Age'].min()), int(df['Age'].max()))
Plan = st.selectbox('Plan', df['Plan'].unique())
Housing = st.selectbox('Housing', df['Housing'].unique())
Credits = st.slider('Credits', float(df['Credits'].min()), float(df['Credits'].max()))
Post = st.selectbox('Post', df['Post'].unique())
Authorities = st.slider('Authorities', float(df['Authorities'].min()), float(df['Authorities'].max()))
Phone = st.selectbox('Phone', df['Phone'].unique())
Expatriate = st.selectbox('Expatriate', [True, False])
InstallmentCredit = st.slider('InstallmentCredit', float(df['InstallmentCredit'].min()), float(df['InstallmentCredit'].max()))
Yearly_Period = st.slider('Yearly_Period', float(df['Yearly_Period'].min()), float(df['Yearly_Period'].max()))
if st.button('Predict Cluster'):
cluster = cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period,
Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period)
st.write(f'The predicted cluster is: {cluster}')