Spaces:

Fralet
/

personality

Sleeping

App Files Files Community

Fralet commited on May 19, 2024

Commit

b96c15e

verified ·

1 Parent(s): 6946054

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -29

app.py CHANGED Viewed

@@ -72,28 +72,13 @@ import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
 # Load data
 data = pd.read_excel("ResponseOpenPredicted.xlsx")
 st.title("Resume-based Personality Prediction by Serikov Ayanbek")
-enneagram_types = {
-    "Peacemaker": "Peacemaker",
-    "Loyalist": "Loyalist",
-    "Achiever": "Achiever",
-    "Reformer": "Reformer",
-    "Individualist": "Individualist",
-    "Helper": "Helper",
-    "Challenger": "Challenger",
-    "Investigator": "Investigator",
-    "Enthusiast": "Enthusiast"
-}
-# Replace numeric or generic labels with descriptive Enneagram types
-data['Predicted_F'] = data['Predicted_F'].map(enneagram_types)
-data['Predicted_M'] = data['Predicted_M'].map(enneagram_types)
 # Function to calculate metrics
 def calculate_metrics(true_labels, predicted_labels):
     accuracy = accuracy_score(true_labels, predicted_labels)
@@ -104,10 +89,18 @@ def calculate_metrics(true_labels, predicted_labels):
 accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
 accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
 # Plotting function for confusion matrices
-def plot_confusion_matrix(conf_matrix, title):
     fig, ax = plt.subplots()
-    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
     plt.title(title)
     plt.xlabel('Predicted Labels')
     plt.ylabel('True Labels')
@@ -116,14 +109,13 @@ def plot_confusion_matrix(conf_matrix, title):
 # Plotting function for distribution of predictions
 def plot_predictions_distribution(data, column, title):
     fig, ax = plt.subplots()
-    sns.countplot(x=column, data=data, palette="viridis", ax=ax)
     plt.title(title)
     plt.xlabel('Predicted Labels')
     plt.ylabel('Count')
-    plt.xticks(rotation=45)  # Rotate labels for better readability
-    ax.xaxis.label.set_size(12)
-    ax.yaxis.label.set_size(12)
-    plt.subplots_adjust(bottom=0.15)  # Adjust spacing to accommodate label rotation
     st.pyplot(fig)
 # Streamlit app structure
@@ -140,13 +132,11 @@ st.write(f"Recall for Predicted_M: {recall_m:.2%}")
 st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
 st.subheader('Confusion Matrices')
-conf_matrix_f = confusion_matrix(data['True_label'], data['Predicted_F'])
-conf_matrix_m = confusion_matrix(data['True_label'], data['Predicted_M'])
-plot_confusion_matrix(conf_matrix_f, 'Confusion Matrix for Predicted_F')
-plot_confusion_matrix(conf_matrix_m, 'Confusion Matrix for Predicted_M')
 st.subheader('Distribution of Prediction Results')
 st.write("Distribution for Predicted_F")
-plot_predictions_distribution(data, 'Predicted_F', 'Distribution of Predictions for Female Demographic')
 st.write("Distribution for Predicted_M")
-plot_predictions_distribution(data, 'Predicted_M', 'Distribution of Predictions for Male Demographic')

 import seaborn as sns
 import matplotlib.pyplot as plt
 from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
+from sklearn.preprocessing import LabelEncoder
 # Load data
 data = pd.read_excel("ResponseOpenPredicted.xlsx")
 st.title("Resume-based Personality Prediction by Serikov Ayanbek")
 # Function to calculate metrics
 def calculate_metrics(true_labels, predicted_labels):
     accuracy = accuracy_score(true_labels, predicted_labels)
 accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
 accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
+# Encode labels for better visualization
+le = LabelEncoder()
+data['True_label_encoded'] = le.fit_transform(data['True_label'])
+data['Predicted_F_encoded'] = le.transform(data['Predicted_F'])
+data['Predicted_M_encoded'] = le.transform(data['Predicted_M'])
 # Plotting function for confusion matrices
+def plot_confusion_matrix(true_labels, predicted_labels, title):
+    conf_matrix = confusion_matrix(true_labels, predicted_labels)
     fig, ax = plt.subplots()
+    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax,
+                xticklabels=le.classes_, yticklabels=le.classes_)
     plt.title(title)
     plt.xlabel('Predicted Labels')
     plt.ylabel('True Labels')
 # Plotting function for distribution of predictions
 def plot_predictions_distribution(data, column, title):
     fig, ax = plt.subplots()
+    sns.countplot(x=column, data=data, palette="viridis")
     plt.title(title)
     plt.xlabel('Predicted Labels')
     plt.ylabel('Count')
+    plt.xticks(rotation=45)
+    ax.set_xticklabels(le.classes_)
+    plt.subplots_adjust(bottom=0.2)
     st.pyplot(fig)
 # Streamlit app structure
 st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
 st.subheader('Confusion Matrices')
+plot_confusion_matrix(data['True_label_encoded'], data['Predicted_F_encoded'], 'Confusion Matrix for Predicted_F')
+plot_confusion_matrix(data['True_label_encoded'], data['Predicted_M_encoded'], 'Confusion Matrix for Predicted_M')
 st.subheader('Distribution of Prediction Results')
 st.write("Distribution for Predicted_F")
+plot_predictions_distribution(data, 'Predicted_F_encoded', 'Distribution of Predictions for Female Demographic')
 st.write("Distribution for Predicted_M")
+plot_predictions_distribution(data, 'Predicted_M_encoded', 'Distribution of Predictions for Male Demographic')