Spaces:

Fralet
/

personality

Running

File size: 2,302 Bytes

1b50b66
e27efab
895141b
2ee3ecc
c05213f
2ee3ecc
 
76aff4b
 
2ee3ecc
 
1b50b66
e27efab
d44d169
1b50b66
e27efab
2ee3ecc
1b50b66
76aff4b
b4628ad
76aff4b
 
367a123
76aff4b
 
 
 
 
 
 
 
 
 
 
 
 
2ee3ecc
 
 
 
 
 
 
 
e27efab
76aff4b
 
 
 
2ee3ecc
76aff4b
 
2ee3ecc
76aff4b

import streamlit as st
from transformers import pipeline
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")

# Define the candidate labels according to the Enneagram types
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]

# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")

# Load data from Excel
data = pd.read_excel("ResponseTest.xlsx")  # Replace 'your_excel_file.xlsx' with your actual file name

# Preprocess text function
def preprocess_text(text):
    text = re.sub(r'\W', ' ', str(text))
    text = text.lower()
    text = re.sub(r'\s+[a-z]\s+', ' ', text)
    text = re.sub(r'^[a-z]\s+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# User-defined labels option
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels

# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

if st.button("Predict Personality"):
    # Combine relevant text columns
    question_columns = [f'Q{i}' for i in range(1, 37)]  # Adjust range if there are more or fewer question columns
    data['combined_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
    data['processed_text'] = data['combined_text'].apply(preprocess_text)
    
    # Make predictions
    predictions = data['processed_text'].apply(lambda x: classifier(x, labels))
    
    # Extract and display predictions
    data['predicted_labels'] = predictions.apply(lambda x: [label for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold])
    st.dataframe(data[['True_label', 'Predicted', 'predicted_labels']])