Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -72,28 +72,13 @@ import pandas as pd
|
|
72 |
import seaborn as sns
|
73 |
import matplotlib.pyplot as plt
|
74 |
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
|
|
|
75 |
|
76 |
|
77 |
# Load data
|
78 |
data = pd.read_excel("ResponseOpenPredicted.xlsx")
|
79 |
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
|
80 |
|
81 |
-
enneagram_types = {
|
82 |
-
"Peacemaker": "Peacemaker",
|
83 |
-
"Loyalist": "Loyalist",
|
84 |
-
"Achiever": "Achiever",
|
85 |
-
"Reformer": "Reformer",
|
86 |
-
"Individualist": "Individualist",
|
87 |
-
"Helper": "Helper",
|
88 |
-
"Challenger": "Challenger",
|
89 |
-
"Investigator": "Investigator",
|
90 |
-
"Enthusiast": "Enthusiast"
|
91 |
-
}
|
92 |
-
|
93 |
-
# Replace numeric or generic labels with descriptive Enneagram types
|
94 |
-
data['Predicted_F'] = data['Predicted_F'].map(enneagram_types)
|
95 |
-
data['Predicted_M'] = data['Predicted_M'].map(enneagram_types)
|
96 |
-
|
97 |
# Function to calculate metrics
|
98 |
def calculate_metrics(true_labels, predicted_labels):
|
99 |
accuracy = accuracy_score(true_labels, predicted_labels)
|
@@ -104,10 +89,18 @@ def calculate_metrics(true_labels, predicted_labels):
|
|
104 |
accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
|
105 |
accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
# Plotting function for confusion matrices
|
108 |
-
def plot_confusion_matrix(
|
|
|
109 |
fig, ax = plt.subplots()
|
110 |
-
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax
|
|
|
111 |
plt.title(title)
|
112 |
plt.xlabel('Predicted Labels')
|
113 |
plt.ylabel('True Labels')
|
@@ -116,14 +109,13 @@ def plot_confusion_matrix(conf_matrix, title):
|
|
116 |
# Plotting function for distribution of predictions
|
117 |
def plot_predictions_distribution(data, column, title):
|
118 |
fig, ax = plt.subplots()
|
119 |
-
sns.countplot(x=column, data=data, palette="viridis"
|
120 |
plt.title(title)
|
121 |
plt.xlabel('Predicted Labels')
|
122 |
plt.ylabel('Count')
|
123 |
-
plt.xticks(rotation=45)
|
124 |
-
ax.
|
125 |
-
|
126 |
-
plt.subplots_adjust(bottom=0.15) # Adjust spacing to accommodate label rotation
|
127 |
st.pyplot(fig)
|
128 |
|
129 |
# Streamlit app structure
|
@@ -140,13 +132,11 @@ st.write(f"Recall for Predicted_M: {recall_m:.2%}")
|
|
140 |
st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
|
141 |
|
142 |
st.subheader('Confusion Matrices')
|
143 |
-
|
144 |
-
|
145 |
-
plot_confusion_matrix(conf_matrix_f, 'Confusion Matrix for Predicted_F')
|
146 |
-
plot_confusion_matrix(conf_matrix_m, 'Confusion Matrix for Predicted_M')
|
147 |
|
148 |
st.subheader('Distribution of Prediction Results')
|
149 |
st.write("Distribution for Predicted_F")
|
150 |
-
plot_predictions_distribution(data, '
|
151 |
st.write("Distribution for Predicted_M")
|
152 |
-
plot_predictions_distribution(data, '
|
|
|
72 |
import seaborn as sns
|
73 |
import matplotlib.pyplot as plt
|
74 |
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
|
75 |
+
from sklearn.preprocessing import LabelEncoder
|
76 |
|
77 |
|
78 |
# Load data
|
79 |
data = pd.read_excel("ResponseOpenPredicted.xlsx")
|
80 |
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
# Function to calculate metrics
|
83 |
def calculate_metrics(true_labels, predicted_labels):
|
84 |
accuracy = accuracy_score(true_labels, predicted_labels)
|
|
|
89 |
accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
|
90 |
accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
|
91 |
|
92 |
+
# Encode labels for better visualization
|
93 |
+
le = LabelEncoder()
|
94 |
+
data['True_label_encoded'] = le.fit_transform(data['True_label'])
|
95 |
+
data['Predicted_F_encoded'] = le.transform(data['Predicted_F'])
|
96 |
+
data['Predicted_M_encoded'] = le.transform(data['Predicted_M'])
|
97 |
+
|
98 |
# Plotting function for confusion matrices
|
99 |
+
def plot_confusion_matrix(true_labels, predicted_labels, title):
|
100 |
+
conf_matrix = confusion_matrix(true_labels, predicted_labels)
|
101 |
fig, ax = plt.subplots()
|
102 |
+
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax,
|
103 |
+
xticklabels=le.classes_, yticklabels=le.classes_)
|
104 |
plt.title(title)
|
105 |
plt.xlabel('Predicted Labels')
|
106 |
plt.ylabel('True Labels')
|
|
|
109 |
# Plotting function for distribution of predictions
|
110 |
def plot_predictions_distribution(data, column, title):
|
111 |
fig, ax = plt.subplots()
|
112 |
+
sns.countplot(x=column, data=data, palette="viridis")
|
113 |
plt.title(title)
|
114 |
plt.xlabel('Predicted Labels')
|
115 |
plt.ylabel('Count')
|
116 |
+
plt.xticks(rotation=45)
|
117 |
+
ax.set_xticklabels(le.classes_)
|
118 |
+
plt.subplots_adjust(bottom=0.2)
|
|
|
119 |
st.pyplot(fig)
|
120 |
|
121 |
# Streamlit app structure
|
|
|
132 |
st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
|
133 |
|
134 |
st.subheader('Confusion Matrices')
|
135 |
+
plot_confusion_matrix(data['True_label_encoded'], data['Predicted_F_encoded'], 'Confusion Matrix for Predicted_F')
|
136 |
+
plot_confusion_matrix(data['True_label_encoded'], data['Predicted_M_encoded'], 'Confusion Matrix for Predicted_M')
|
|
|
|
|
137 |
|
138 |
st.subheader('Distribution of Prediction Results')
|
139 |
st.write("Distribution for Predicted_F")
|
140 |
+
plot_predictions_distribution(data, 'Predicted_F_encoded', 'Distribution of Predictions for Female Demographic')
|
141 |
st.write("Distribution for Predicted_M")
|
142 |
+
plot_predictions_distribution(data, 'Predicted_M_encoded', 'Distribution of Predictions for Male Demographic')
|