Spaces:
Sleeping
Sleeping
File size: 3,278 Bytes
1b50b66 895141b 324d859 2ee3ecc c05213f 2ee3ecc 76aff4b 2ee3ecc 1b50b66 e27efab 7711d36 1b50b66 76aff4b b4628ad d605d91 76aff4b a1a24b4 d605d91 76aff4b a1a24b4 76aff4b 75a0105 76aff4b 2ee3ecc 75a0105 870e773 ba8654a d605d91 6297210 324d859 75a0105 2ee3ecc d605d91 a1a24b4 75a0105 ba8654a 75a0105 ba8654a a1a24b4 75a0105 a1a24b4 26903af d605d91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
resume_text = st.text_area("Enter Resume Text Here", height=300)
# Load data from Excel
data = pd.read_excel("ResponseTest.xlsx") # Replace 'ResponseTest.xlsx' with your actual file name
data_open = pd.read_excel("ResponseOpen.xlsx") # Replace 'ResponseTest.xlsx' with your actual file name
# Preprocess text function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Combine relevant text columns for processing
question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust the range based on your data columns
data['processed_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
#data['processed_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1).apply(preprocess_text)
data_open['processed_text_open'] = data_open[['CV/Resume', 'Question']].agg(' '.join, axis=1)
#data_open['processed_text_open'] = data_open[['CV/Resume', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality by Test"):
# Function to apply predictions using dynamic labels from MAX1, MAX2, MAX3 and only return the highest scored label
def get_predictions(row):
custom_labels = [row['MAX1'], row['MAX2'], row['MAX3']] # Get labels from each row
processed_text = row['processed_text']
result = classifier(processed_text, custom_labels)
highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
return highest_score_label
# Apply predictions across all rows
data['Predicted'] = data.apply(get_predictions, axis=1)
st.dataframe(data[['True_label','MAX1','MAX2','MAX3','processed_text' 'Predicted']])
if st.button("Predict Personality by Open Question"):
def get_predictions(row):
processed_text = row['processed_text_open']
result = classifier(processed_text, labels)
highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
return highest_score_label
# Apply predictions across all rows
data_open['Predicted'] = data_open.apply(get_predictions, axis=1)
st.dataframe(data_open[['True_label', 'Predicted']])
|