personality / app.py
Fralet's picture
Update app.py
c05213f verified
raw
history blame
2.03 kB
import streamlit as st
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Define the candidate labels according to the Enneagram types
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Streamlit interface
st.title("Resume-based Personality Prediction")
resume_text = st.text_area("Enter Resume Text Here", height=300)
# User-defined labels option
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels
# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality"):
# Text Preprocessing
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
processed_text = preprocess_text(resume_text)
# Make prediction
result = classifier(processed_text, labels)
# Display the results
st.write("Predictions (above confidence threshold):")
displayed = False
for label, score in zip(result['labels'], result['scores']):
if score >= confidence_threshold:
st.write(f"{label}: {score*100:.2f}%")
displayed = True
if not displayed:
st.write("No predictions exceed the confidence threshold.")