Spaces:
Running
Running
File size: 2,498 Bytes
1b50b66 895141b 324d859 2ee3ecc c05213f 2ee3ecc 76aff4b 2ee3ecc 1b50b66 e27efab d44d169 1b50b66 324d859 2ee3ecc 1b50b66 76aff4b b4628ad 76aff4b 324d859 76aff4b 324d859 76aff4b 2ee3ecc 324d859 2ee3ecc e27efab 324d859 2ee3ecc 324d859 2ee3ecc 324d859 76aff4b 324d859 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")
# Define the default candidate labels (modifiable if different labels are needed)
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
# Load data from Excel
data = pd.read_excel("your_excel_file.xlsx") # Adjust file path/name as necessary
# Preprocess text function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text is re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Combine relevant text columns
data['combined_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(' '.join, axis=1)
data['processed_text'] = data['combined_text'].apply(preprocess_text)
# Streamlit user inputs
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality"):
# Predict personality from processed text
data['predictions'] = data['processed_text'].apply(lambda x: classifier(x, labels))
# Extract predictions above confidence threshold and display alongside MAX labels
data['predicted_labels'] = data['predictions'].apply(lambda x: {label: f"{score*100:.2f}%" for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold})
data['MAX_labels'] = data.apply(lambda x: [x['MAX1'], x['MAX2'], x['MAX3']], axis=1)
st.write("Predictions and Labels:")
st.dataframe(data[['True_label', 'Predicted', 'predicted_labels', 'MAX_labels']])
# Run this last part to show the DataFrame outside the button press if needed
st.dataframe(data) |