Fralet commited on
Commit
b96c15e
·
verified ·
1 Parent(s): 6946054

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -29
app.py CHANGED
@@ -72,28 +72,13 @@ import pandas as pd
72
  import seaborn as sns
73
  import matplotlib.pyplot as plt
74
  from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
 
75
 
76
 
77
  # Load data
78
  data = pd.read_excel("ResponseOpenPredicted.xlsx")
79
  st.title("Resume-based Personality Prediction by Serikov Ayanbek")
80
 
81
- enneagram_types = {
82
- "Peacemaker": "Peacemaker",
83
- "Loyalist": "Loyalist",
84
- "Achiever": "Achiever",
85
- "Reformer": "Reformer",
86
- "Individualist": "Individualist",
87
- "Helper": "Helper",
88
- "Challenger": "Challenger",
89
- "Investigator": "Investigator",
90
- "Enthusiast": "Enthusiast"
91
- }
92
-
93
- # Replace numeric or generic labels with descriptive Enneagram types
94
- data['Predicted_F'] = data['Predicted_F'].map(enneagram_types)
95
- data['Predicted_M'] = data['Predicted_M'].map(enneagram_types)
96
-
97
  # Function to calculate metrics
98
  def calculate_metrics(true_labels, predicted_labels):
99
  accuracy = accuracy_score(true_labels, predicted_labels)
@@ -104,10 +89,18 @@ def calculate_metrics(true_labels, predicted_labels):
104
  accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
105
  accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
106
 
 
 
 
 
 
 
107
  # Plotting function for confusion matrices
108
- def plot_confusion_matrix(conf_matrix, title):
 
109
  fig, ax = plt.subplots()
110
- sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax)
 
111
  plt.title(title)
112
  plt.xlabel('Predicted Labels')
113
  plt.ylabel('True Labels')
@@ -116,14 +109,13 @@ def plot_confusion_matrix(conf_matrix, title):
116
  # Plotting function for distribution of predictions
117
  def plot_predictions_distribution(data, column, title):
118
  fig, ax = plt.subplots()
119
- sns.countplot(x=column, data=data, palette="viridis", ax=ax)
120
  plt.title(title)
121
  plt.xlabel('Predicted Labels')
122
  plt.ylabel('Count')
123
- plt.xticks(rotation=45) # Rotate labels for better readability
124
- ax.xaxis.label.set_size(12)
125
- ax.yaxis.label.set_size(12)
126
- plt.subplots_adjust(bottom=0.15) # Adjust spacing to accommodate label rotation
127
  st.pyplot(fig)
128
 
129
  # Streamlit app structure
@@ -140,13 +132,11 @@ st.write(f"Recall for Predicted_M: {recall_m:.2%}")
140
  st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
141
 
142
  st.subheader('Confusion Matrices')
143
- conf_matrix_f = confusion_matrix(data['True_label'], data['Predicted_F'])
144
- conf_matrix_m = confusion_matrix(data['True_label'], data['Predicted_M'])
145
- plot_confusion_matrix(conf_matrix_f, 'Confusion Matrix for Predicted_F')
146
- plot_confusion_matrix(conf_matrix_m, 'Confusion Matrix for Predicted_M')
147
 
148
  st.subheader('Distribution of Prediction Results')
149
  st.write("Distribution for Predicted_F")
150
- plot_predictions_distribution(data, 'Predicted_F', 'Distribution of Predictions for Female Demographic')
151
  st.write("Distribution for Predicted_M")
152
- plot_predictions_distribution(data, 'Predicted_M', 'Distribution of Predictions for Male Demographic')
 
72
  import seaborn as sns
73
  import matplotlib.pyplot as plt
74
  from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
75
+ from sklearn.preprocessing import LabelEncoder
76
 
77
 
78
  # Load data
79
  data = pd.read_excel("ResponseOpenPredicted.xlsx")
80
  st.title("Resume-based Personality Prediction by Serikov Ayanbek")
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  # Function to calculate metrics
83
  def calculate_metrics(true_labels, predicted_labels):
84
  accuracy = accuracy_score(true_labels, predicted_labels)
 
89
  accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
90
  accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
91
 
92
+ # Encode labels for better visualization
93
+ le = LabelEncoder()
94
+ data['True_label_encoded'] = le.fit_transform(data['True_label'])
95
+ data['Predicted_F_encoded'] = le.transform(data['Predicted_F'])
96
+ data['Predicted_M_encoded'] = le.transform(data['Predicted_M'])
97
+
98
  # Plotting function for confusion matrices
99
+ def plot_confusion_matrix(true_labels, predicted_labels, title):
100
+ conf_matrix = confusion_matrix(true_labels, predicted_labels)
101
  fig, ax = plt.subplots()
102
+ sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax,
103
+ xticklabels=le.classes_, yticklabels=le.classes_)
104
  plt.title(title)
105
  plt.xlabel('Predicted Labels')
106
  plt.ylabel('True Labels')
 
109
  # Plotting function for distribution of predictions
110
  def plot_predictions_distribution(data, column, title):
111
  fig, ax = plt.subplots()
112
+ sns.countplot(x=column, data=data, palette="viridis")
113
  plt.title(title)
114
  plt.xlabel('Predicted Labels')
115
  plt.ylabel('Count')
116
+ plt.xticks(rotation=45)
117
+ ax.set_xticklabels(le.classes_)
118
+ plt.subplots_adjust(bottom=0.2)
 
119
  st.pyplot(fig)
120
 
121
  # Streamlit app structure
 
132
  st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
133
 
134
  st.subheader('Confusion Matrices')
135
+ plot_confusion_matrix(data['True_label_encoded'], data['Predicted_F_encoded'], 'Confusion Matrix for Predicted_F')
136
+ plot_confusion_matrix(data['True_label_encoded'], data['Predicted_M_encoded'], 'Confusion Matrix for Predicted_M')
 
 
137
 
138
  st.subheader('Distribution of Prediction Results')
139
  st.write("Distribution for Predicted_F")
140
+ plot_predictions_distribution(data, 'Predicted_F_encoded', 'Distribution of Predictions for Female Demographic')
141
  st.write("Distribution for Predicted_M")
142
+ plot_predictions_distribution(data, 'Predicted_M_encoded', 'Distribution of Predictions for Male Demographic')