File size: 2,724 Bytes
1b50b66
895141b
324d859
2ee3ecc
c05213f
2ee3ecc
 
76aff4b
 
2ee3ecc
 
1b50b66
e27efab
7711d36
1b50b66
76aff4b
b4628ad
d605d91
76aff4b
 
bb3c6bc
 
76aff4b
bb3c6bc
76aff4b
 
 
a1a24b4
76aff4b
75a0105
76aff4b
 
 
 
 
2ee3ecc
bb3c6bc
 
 
 
d605d91
6297210
2ee3ecc
 
bb3c6bc
 
 
 
 
 
48b2405
bb3c6bc
 
 
 
 
d605d91
bb3c6bc
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
resume_text = st.text_area("Enter Resume Text Here", height=300)

# Load data from Excel
data = pd.read_excel("ResponseTest.xlsx")
data_open = pd.read_excel("ResponseOpen.xlsx")

# Define preprocessing function
def preprocess_text(text):
    text = re.sub(r'\W', ' ', str(text))
    text = text.lower()
    text = re.sub(r'\s+[a-z]\s+', ' ', text)
    text = re.sub(r'^[a-z]\s+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# Prepare the data for prediction
data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text)
data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)

labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

# Automatic prediction on resume text input
if resume_text:
    processed_resume = preprocess_text(resume_text)
    resume_prediction = classifier(processed_resume, labels)
    highest_score_label = resume_prediction['labels'][0]
    st.write("Predicted Personality for the given resume:", highest_score_label)

# Automatic prediction for each row in DataFrame
for index, row in data.iterrows():
    result = classifier(row['processed_text'], labels)
    data.at[index, 'Predicted'] = result['labels'][0]
st.dataframe(data[['True_label', 'Predicted']])

# Separate predictions for Female and Male questions
for index, row in data_open.iterrows():
    result_f = classifier(row['processed_text_open'], labels)
    result_m = classifier(row['processed_text_mopen'], labels)
    data_open.at[index, 'Predicted_F'] = result_f['labels'][0]
    data_open.at[index, 'Predicted_M'] = result_m['labels'][0]
st.dataframe(data_open[['True_label', 'Predicted_F', 'Predicted_M']])