Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -18,10 +18,10 @@ st.title("Resume-based Personality Prediction by Serikov Ayanbek")
|
|
18 |
resume_text = st.text_area("Enter Resume Text Here", height=300)
|
19 |
|
20 |
# Load data from Excel
|
21 |
-
data = pd.read_excel("ResponseTest.xlsx")
|
22 |
-
data_open = pd.read_excel("ResponseOpen.xlsx")
|
23 |
|
24 |
-
#
|
25 |
def preprocess_text(text):
|
26 |
text = re.sub(r'\W', ' ', str(text))
|
27 |
text = text.lower()
|
@@ -34,54 +34,31 @@ def preprocess_text(text):
|
|
34 |
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
|
35 |
return ' '.join(tokens)
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
#data_open['processed_text_open'] = data_open[['CV/Resume', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
|
42 |
-
data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1)
|
43 |
-
data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1)
|
44 |
|
45 |
labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
|
46 |
-
|
47 |
-
# Prediction confidence threshold
|
48 |
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
|
57 |
-
return highest_score_label
|
58 |
-
|
59 |
-
# Apply predictions across all rows
|
60 |
-
data['Predicted'] = data.apply(get_predictions, axis=1)
|
61 |
-
st.dataframe(data[['True_label','MAX1','MAX2','MAX3', 'Predicted']])
|
62 |
-
|
63 |
-
if st.button("Predict Personality by Open Question F"):
|
64 |
-
def get_predictions(row):
|
65 |
-
processed_text = row['processed_text_open']
|
66 |
-
result = classifier(processed_text, labels)
|
67 |
-
highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
|
68 |
-
return highest_score_label
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
# Apply predictions across all rows
|
73 |
-
data_open['Predicted_F'] = data_open.apply(get_predictions, axis=1)
|
74 |
-
st.dataframe(data_open[['True_label', 'Predicted_F']])
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
result = classifier(processed_text, labels)
|
82 |
-
highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
|
83 |
-
return highest_score_label
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
18 |
resume_text = st.text_area("Enter Resume Text Here", height=300)
|
19 |
|
20 |
# Load data from Excel
|
21 |
+
data = pd.read_excel("ResponseTest.xlsx")
|
22 |
+
data_open = pd.read_excel("ResponseOpen.xlsx")
|
23 |
|
24 |
+
# Define preprocessing function
|
25 |
def preprocess_text(text):
|
26 |
text = re.sub(r'\W', ' ', str(text))
|
27 |
text = text.lower()
|
|
|
34 |
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
|
35 |
return ' '.join(tokens)
|
36 |
|
37 |
+
# Prepare the data for prediction
|
38 |
+
data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text)
|
39 |
+
data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
|
40 |
+
data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
|
|
|
|
|
|
|
41 |
|
42 |
labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
|
|
|
|
|
43 |
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
|
44 |
|
45 |
+
# Automatic prediction on resume text input
|
46 |
+
if resume_text:
|
47 |
+
processed_resume = preprocess_text(resume_text)
|
48 |
+
resume_prediction = classifier(processed_resume, labels)
|
49 |
+
highest_score_label = resume_prediction['labels'][0]
|
50 |
+
st.write("Predicted Personality for the given resume:", highest_score_label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
# Automatic prediction for each row in DataFrame
|
53 |
+
for index, row in data.iterrows():
|
54 |
+
result = classifier(row['processed_text'], labels)
|
55 |
+
data.at[index, 'Predicted'] = result['labels'][0]
|
56 |
+
st.dataframe(data[['True_label', 'Predicted']])
|
|
|
|
|
|
|
57 |
|
58 |
+
# Separate predictions for Female and Male questions
|
59 |
+
for index, row in data_open.iterrows():
|
60 |
+
result_f = classifier(row['processed_text_open'], labels)
|
61 |
+
result_m = classifier(row['processed_text_mopen'], labels)
|
62 |
+
data_open.at[index, 'Predicted_F'] = result_f['labels'][0]
|
63 |
+
data_open.at[index, 'Predicted_M'] = result_m['labels'][0]
|
64 |
+
st.dataframe(data_open[['True_label', 'Predicted_F', 'Predicted_M']])
|