personality / app.py
Fralet's picture
Update app.py
324d859 verified
raw
history blame
2.5 kB
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")
# Define the default candidate labels (modifiable if different labels are needed)
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
# Load data from Excel
data = pd.read_excel("your_excel_file.xlsx") # Adjust file path/name as necessary
# Preprocess text function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text is re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Combine relevant text columns
data['combined_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(' '.join, axis=1)
data['processed_text'] = data['combined_text'].apply(preprocess_text)
# Streamlit user inputs
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality"):
# Predict personality from processed text
data['predictions'] = data['processed_text'].apply(lambda x: classifier(x, labels))
# Extract predictions above confidence threshold and display alongside MAX labels
data['predicted_labels'] = data['predictions'].apply(lambda x: {label: f"{score*100:.2f}%" for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold})
data['MAX_labels'] = data.apply(lambda x: [x['MAX1'], x['MAX2'], x['MAX3']], axis=1)
st.write("Predictions and Labels:")
st.dataframe(data[['True_label', 'Predicted', 'predicted_labels', 'MAX_labels']])
# Run this last part to show the DataFrame outside the button press if needed
st.dataframe(data)