File size: 3,014 Bytes
f60bb2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Import necessary libraries for streamlit and model loading
import streamlit as st
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load the saved model from Hugging Face Space
@st.cache(allow_output_mutation=True)
def load_model():
    model_path = 'best_stacking_model.pkl'  # Assuming you uploaded the model to the space root directory
    model = joblib.load(model_path)
    return model

# Function to process uploaded CSV
def process_csv(uploaded_file):
    # Read the CSV file
    df = pd.read_csv(uploaded_file)
    return df

# Streamlit app starts here
st.title("Conjunctivitis Model Analysis App")

# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file is not None:
    # Process uploaded file
    df = process_csv(uploaded_file)
    st.write("Data Preview:")
    st.write(df.head())
    
    # Split into features and target (assuming the file has a 'Target_goal' column)
    if 'Target_goal' not in df.columns:
        st.error("Error: The uploaded CSV does not contain a 'Target_goal' column.")
    else:
        X = df.drop(columns=['Target_goal'])
        y = df['Target_goal']
        
        # Standardize the data
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        
        # Load the model
        model = load_model()
        
        # Make predictions
        y_pred = model.predict(X_scaled)
        y_pred_proba = model.predict_proba(X_scaled)[:, 1]
        
        # Calculate metrics
        acc = accuracy_score(y, y_pred)
        fpr, tpr, _ = roc_curve(y, y_pred_proba)
        roc_auc = auc(fpr, tpr)
        
        st.write(f"Accuracy: {acc:.2f}")
        st.write(f"AUC: {roc_auc:.2f}")
        
        # Classification report
        st.write("Classification Report:")
        report = classification_report(y, y_pred, output_dict=True)
        st.write(pd.DataFrame(report).transpose())
        
        # Confusion Matrix
        st.write("Confusion Matrix:")
        conf_matrix = confusion_matrix(y, y_pred)
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        st.pyplot(plt.gcf())
        
        # ROC Curve
        st.write("ROC Curve:")
        plt.figure(figsize=(10, 6))
        plt.plot(fpr, tpr, color='blue', lw=2, label='ROC Curve (AUC = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve')
        plt.legend(loc="lower right")
        st.pyplot(plt.gcf())

# Option to download the saved model
st.write("[Download best model](best_stacking_model.pkl)")