ModelMetricsHub / app.py
Hussnainkha's picture
Modelmetrics hub
8284620 verified
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Function to build and evaluate models
def build_and_evaluate_model(model, X_train, X_test, y_train, y_test):
try:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Convert y_pred to the same data type as y_test for comparison
y_pred = y_pred.astype(str) #Used in data manipulation
accuracy = accuracy_score(y_test.astype(str), y_pred)
return accuracy
except Exception as e:
return f"Not Applicable: {str(e)}"
# Streamlit app
def main():
# Custom CSS for styling the title
custom_css = """
<style>
.title-text {
font-size: 55px;
font-weight: bold;
color: #FF0A01; /* Dark Blue */
text-align: center;
margin-bottom: 0px;
}
.tagline-text {
font-size: 18px;
font-style: italic;
color: #2c3e50; /* White */
text-align: center;
margin-top:-10px;
margin-bottom: 20px;
}
</style>
"""
# Streamlit app
st.markdown(custom_css, unsafe_allow_html=True)
# Title and tagline
st.markdown("<p class='title-text'>ModelMetrics Hub</p>", unsafe_allow_html=True)
st.markdown("<p class='tagline-text'>Optimal Model Explorer</p>", unsafe_allow_html=True)
#st.title("ModelMetrics Hub \n _Optimal Model Explorer_")
st.sidebar.title("Model Selection")
model_names = ["Random Forest", "Decision Tree", "KNN", "SVM", "Logistic Regression"]
selected_models = st.sidebar.multiselect("Choose models to compare", model_names, default=model_names)
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file is not None:
data = pd.read_csv(uploaded_file, encoding='latin-1') # Specify encoding here
st.write("Preview of the dataset:")
st.write(data.head())
feature_columns = st.multiselect("Select feature columns", data.columns.tolist())
target_column = st.selectbox("Select target column", data.columns.tolist())
if st.button("Run Models"):
X = data[feature_columns]
y = data[target_column]
y = y.astype(str)
# Perform one-hot encoding for categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
models = {
"Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
"Decision Tree": DecisionTreeClassifier(random_state=42),
"KNN": KNeighborsClassifier(n_neighbors=5),
"SVM": SVC(kernel='rbf', random_state=42),
"Logistic Regression": LogisticRegression(max_iter=1000, random_state=42)
}
results = {}
for model_name in selected_models:
accuracy = build_and_evaluate_model(models[model_name], X_train, X_test, y_train, y_test)
results[model_name] = accuracy
sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True) #key=lambda x: is a custom sorting order
st.subheader("Accuracy")
for model_name, accuracy in sorted_results:
st.write(f"{model_name}: {accuracy * 100:.2f}%")
if __name__ == "__main__":
main()