Spaces:

Fralet
/

personality

Running

File size: 2,025 Bytes

1b50b66
e27efab
2ee3ecc
c05213f
2ee3ecc
 
 
 
1b50b66
e27efab
d44d169
1b50b66
e27efab
2ee3ecc
1b50b66
e27efab
 
 
2ee3ecc
 
 
 
 
 
 
 
e27efab
2ee3ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e27efab
2ee3ecc
 
e27efab
2ee3ecc
 
e27efab
2ee3ecc

import streamlit as st
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")

# Define the candidate labels according to the Enneagram types
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]

# Streamlit interface
st.title("Resume-based Personality Prediction")
resume_text = st.text_area("Enter Resume Text Here", height=300)

# User-defined labels option
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels

# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

if st.button("Predict Personality"):
    # Text Preprocessing
    def preprocess_text(text):
        text = re.sub(r'\W', ' ', str(text))
        text = text.lower()
        text = re.sub(r'\s+[a-z]\s+', ' ', text)
        text = re.sub(r'^[a-z]\s+', ' ', text) 
        text = re.sub(r'\s+', ' ', text)
        stop_words = set(stopwords.words('english'))
        lemmatizer = WordNetLemmatizer()
        tokens = text.split()
        tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
        return ' '.join(tokens)

    processed_text = preprocess_text(resume_text)
    
    # Make prediction
    result = classifier(processed_text, labels)
    
    # Display the results
    st.write("Predictions (above confidence threshold):")
    displayed = False
    for label, score in zip(result['labels'], result['scores']):
        if score >= confidence_threshold:
            st.write(f"{label}: {score*100:.2f}%")
            displayed = True
    if not displayed:
        st.write("No predictions exceed the confidence threshold.")