import pandas as pd import streamlit as st import joblib from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer # Load data and update column names df = pd.read_csv('dataset.csv') df = df.drop(['id','Class'], axis=1) df.columns = df.columns.str.replace(r'[\s\.]', '_', regex=True) df.columns = df.columns.str.replace(r'Gender&Type', 'Gender_Type', regex=True) df.columns = df.columns.str.replace(r'#', '', regex=True) df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']] = df[['Monthly_Period', 'Credit1', 'InstallmentRate', 'Tenancy_Period', 'Age', 'Credits', 'Authorities']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int) df[['InstallmentCredit', 'Yearly_Period']] = df[['InstallmentCredit', 'Yearly_Period']].apply(pd.to_numeric, errors='coerce').fillna(0).astype(float) # Select dependent and independent variables x = df # Preprocessing (StandardScaler) preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), ["Monthly_Period", "Credit1", "InstallmentRate", "Tenancy_Period", "Age", "Credits", "Authorities", "InstallmentCredit", "Yearly_Period"]), ('cat', OneHotEncoder(), ["Account1", "History", "Motive", "Account2", "Employment_Period", "Gender_Type", "Sponsors", "Plotsize", "Plan", "Housing", "Post", "Phone", "Expatriate"]) ] ) # Streamlit application def cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period, Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period): input_data = pd.DataFrame({ 'Account1': [Account1], 'Monthly_Period': [Monthly_Period], 'History': [History], 'Motive': [Motive], 'Credit1': [Credit1], 'Account2': [Account2], 'Employment_Period': [Employment_Period], 'InstallmentRate': [InstallmentRate], 'Gender_Type': [Gender_Type], 'Sponsors': [Sponsors], 'Tenancy_Period': [Tenancy_Period], 'Plotsize': [Plotsize], 'Age': [Age], 'Plan': [Plan], 'Housing': [Housing], 'Credits': [Credits], 'Post': [Post], 'Authorities': [Authorities], 'Phone': [Phone], 'Expatriate': [Expatriate], 'InstallmentCredit': [InstallmentCredit], 'Yearly_Period': [Yearly_Period] }) model = joblib.load('Veri.pkl') scaler = StandardScaler() input_data_transformed = pd.get_dummies(input_data, drop_first=True) input_data_transformed = input_data_transformed.reindex(columns=x.columns, fill_value=0) input_data_transformed = scaler.transform(input_data_transformed) input_data_transformed = preprocessor.fit_transform(input_data) prediction = model.predict(input_data_transformed) return float(prediction[0]) st.title("KMeans Clustering Model") st.write("Enter Input Data to Predict Cluster") Account1 = st.selectbox('Account1', df['Account1'].unique()) Monthly_Period = st.slider('Monthly_Period', int(df['Monthly_Period'].min()), int(df['Monthly_Period'].max())) History = st.selectbox('History', df['History'].unique()) Motive = st.selectbox('Motive', df['Motive'].unique()) Credit1 = st.slider('Credit1', int(df['Credit1'].min()), int(df['Credit1'].max())) Account2 = st.selectbox('Account2', df['Account2'].unique()) Employment_Period = st.selectbox('Employment_Period', df['Employment_Period'].unique()) InstallmentRate = st.slider('InstallmentRate', int(df['InstallmentRate'].min()), int(df['InstallmentRate'].max())) Gender_Type = st.selectbox('Gender_Type', df['Gender_Type'].unique()) Sponsors = st.selectbox('Sponsors', df['Sponsors'].unique()) Tenancy_Period = st.slider('Tenancy_Period', int(df['Tenancy_Period'].min()), int(df['Tenancy_Period'].max())) Plotsize = st.selectbox('Plotsize', df['Plotsize'].unique()) Age = st.slider('Age', int(df['Age'].min()), int(df['Age'].max())) Plan = st.selectbox('Plan', df['Plan'].unique()) Housing = st.selectbox('Housing', df['Housing'].unique()) Credits = st.slider('Credits', float(df['Credits'].min()), float(df['Credits'].max())) Post = st.selectbox('Post', df['Post'].unique()) Authorities = st.slider('Authorities', float(df['Authorities'].min()), float(df['Authorities'].max())) Phone = st.selectbox('Phone', df['Phone'].unique()) Expatriate = st.selectbox('Expatriate', [True, False]) InstallmentCredit = st.slider('InstallmentCredit', float(df['InstallmentCredit'].min()), float(df['InstallmentCredit'].max())) Yearly_Period = st.slider('Yearly_Period', float(df['Yearly_Period'].min()), float(df['Yearly_Period'].max())) if st.button('Predict Cluster'): cluster = cluster_pred(Account1, Monthly_Period, History, Motive, Credit1, Account2, Employment_Period, InstallmentRate, Gender_Type, Sponsors, Tenancy_Period, Plotsize, Age, Plan, Housing, Credits, Post, Authorities, Phone, Expatriate, InstallmentCredit, Yearly_Period) st.write(f'The predicted cluster is: {cluster}')