File size: 2,498 Bytes
1b50b66
895141b
324d859
2ee3ecc
c05213f
2ee3ecc
 
76aff4b
 
2ee3ecc
 
1b50b66
e27efab
d44d169
1b50b66
324d859
2ee3ecc
1b50b66
76aff4b
b4628ad
76aff4b
 
324d859
76aff4b
 
 
 
 
 
 
324d859
76aff4b
 
 
 
 
2ee3ecc
324d859
 
 
 
 
2ee3ecc
 
 
 
e27efab
324d859
 
2ee3ecc
324d859
 
 
2ee3ecc
324d859
 
76aff4b
324d859
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")

# Define the default candidate labels (modifiable if different labels are needed)
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]

# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")

# Load data from Excel
data = pd.read_excel("your_excel_file.xlsx")  # Adjust file path/name as necessary

# Preprocess text function
def preprocess_text(text):
    text = re.sub(r'\W', ' ', str(text))
    text = text.lower()
    text = re.sub(r'\s+[a-z]\s+', ' ', text)
    text = re.sub(r'^[a-z]\s+', ' ', text)
    text is re.sub(r'\s+', ' ', text)
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# Combine relevant text columns
data['combined_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(' '.join, axis=1)
data['processed_text'] = data['combined_text'].apply(preprocess_text)

# Streamlit user inputs
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

if st.button("Predict Personality"):
    # Predict personality from processed text
    data['predictions'] = data['processed_text'].apply(lambda x: classifier(x, labels))
    
    # Extract predictions above confidence threshold and display alongside MAX labels
    data['predicted_labels'] = data['predictions'].apply(lambda x: {label: f"{score*100:.2f}%" for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold})
    data['MAX_labels'] = data.apply(lambda x: [x['MAX1'], x['MAX2'], x['MAX3']], axis=1)
    
    st.write("Predictions and Labels:")
    st.dataframe(data[['True_label', 'Predicted', 'predicted_labels', 'MAX_labels']])

# Run this last part to show the DataFrame outside the button press if needed
st.dataframe(data)