Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.svm import SVC | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import accuracy_score | |
# Function to build and evaluate models | |
def build_and_evaluate_model(model, X_train, X_test, y_train, y_test): | |
try: | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
# Convert y_pred to the same data type as y_test for comparison | |
y_pred = y_pred.astype(str) #Used in data manipulation | |
accuracy = accuracy_score(y_test.astype(str), y_pred) | |
return accuracy | |
except Exception as e: | |
return f"Not Applicable: {str(e)}" | |
# Streamlit app | |
def main(): | |
# Custom CSS for styling the title | |
custom_css = """ | |
<style> | |
.title-text { | |
font-size: 55px; | |
font-weight: bold; | |
color: #FF0A01; /* Dark Blue */ | |
text-align: center; | |
margin-bottom: 0px; | |
} | |
.tagline-text { | |
font-size: 18px; | |
font-style: italic; | |
color: #2c3e50; /* White */ | |
text-align: center; | |
margin-top:-10px; | |
margin-bottom: 20px; | |
} | |
</style> | |
""" | |
# Streamlit app | |
st.markdown(custom_css, unsafe_allow_html=True) | |
# Title and tagline | |
st.markdown("<p class='title-text'>ModelMetrics Hub</p>", unsafe_allow_html=True) | |
st.markdown("<p class='tagline-text'>Optimal Model Explorer</p>", unsafe_allow_html=True) | |
#st.title("ModelMetrics Hub \n _Optimal Model Explorer_") | |
st.sidebar.title("Model Selection") | |
model_names = ["Random Forest", "Decision Tree", "KNN", "SVM", "Logistic Regression"] | |
selected_models = st.sidebar.multiselect("Choose models to compare", model_names, default=model_names) | |
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) | |
if uploaded_file is not None: | |
data = pd.read_csv(uploaded_file, encoding='latin-1') # Specify encoding here | |
st.write("Preview of the dataset:") | |
st.write(data.head()) | |
feature_columns = st.multiselect("Select feature columns", data.columns.tolist()) | |
target_column = st.selectbox("Select target column", data.columns.tolist()) | |
if st.button("Run Models"): | |
X = data[feature_columns] | |
y = data[target_column] | |
y = y.astype(str) | |
# Perform one-hot encoding for categorical columns | |
categorical_cols = X.select_dtypes(include=['object']).columns.tolist() | |
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
models = { | |
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), | |
"Decision Tree": DecisionTreeClassifier(random_state=42), | |
"KNN": KNeighborsClassifier(n_neighbors=5), | |
"SVM": SVC(kernel='rbf', random_state=42), | |
"Logistic Regression": LogisticRegression(max_iter=1000, random_state=42) | |
} | |
results = {} | |
for model_name in selected_models: | |
accuracy = build_and_evaluate_model(models[model_name], X_train, X_test, y_train, y_test) | |
results[model_name] = accuracy | |
sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True) #key=lambda x: is a custom sorting order | |
st.subheader("Accuracy") | |
for model_name, accuracy in sorted_results: | |
st.write(f"{model_name}: {accuracy * 100:.2f}%") | |
if __name__ == "__main__": | |
main() | |