personality / app.py
Fralet's picture
Update app.py
895141b verified
raw
history blame
2.3 kB
import streamlit as st
from transformers import pipeline
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")
# Define the candidate labels according to the Enneagram types
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
# Load data from Excel
data = pd.read_excel("ResponseTest.xlsx") # Replace 'your_excel_file.xlsx' with your actual file name
# Preprocess text function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# User-defined labels option
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels
# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality"):
# Combine relevant text columns
question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust range if there are more or fewer question columns
data['combined_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
data['processed_text'] = data['combined_text'].apply(preprocess_text)
# Make predictions
predictions = data['processed_text'].apply(lambda x: classifier(x, labels))
# Extract and display predictions
data['predicted_labels'] = predictions.apply(lambda x: [label for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold])
st.dataframe(data[['True_label', 'Predicted', 'predicted_labels']])