Spaces:
Sleeping
Sleeping
File size: 2,961 Bytes
1b50b66 895141b 324d859 2ee3ecc c05213f 2ee3ecc 76aff4b 2ee3ecc 1b50b66 e27efab 7711d36 1b50b66 76aff4b b4628ad d605d91 76aff4b bb3c6bc 76aff4b bb3c6bc 76aff4b a1a24b4 76aff4b 75a0105 76aff4b 2ee3ecc bb3c6bc d605d91 6297210 2ee3ecc bb3c6bc 48b2405 5b10278 d605d91 bb3c6bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
resume_text = st.text_area("Enter Resume Text Here", height=300)
# Load data from Excel
data = pd.read_excel("ResponseTest.xlsx")
data_open = pd.read_excel("ResponseOpen.xlsx")
# Define preprocessing function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Prepare the data for prediction
data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text)
data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
# Automatic prediction on resume text input
if resume_text:
processed_resume = preprocess_text(resume_text)
resume_prediction = classifier(processed_resume, labels)
highest_score_label = resume_prediction['labels'][0]
st.write("Predicted Personality for the given resume:", highest_score_label)
# Automatic prediction for each row in DataFrame using custom labels
def predict_personality(row):
custom_labels = [row['MAX1'], row['MAX2'], row['MAX3']]
processed_text = row['processed_text']
result = classifier(processed_text, custom_labels)
return result['labels'][0] # Assumes the labels are sorted by score, highest first
data['Predicted'] = data.apply(predict_personality, axis=1)
st.dataframe(data[['True_label', 'MAX1', 'MAX2', 'MAX3', 'Predicted']])
# Separate predictions for Female and Male questions
for index, row in data_open.iterrows():
result_f = classifier(row['processed_text_open'], labels)
result_m = classifier(row['processed_text_mopen'], labels)
data_open.at[index, 'Predicted_F'] = result_f['labels'][0]
data_open.at[index, 'Predicted_M'] = result_m['labels'][0]
st.dataframe(data_open[['True_label', 'Predicted_F', 'Predicted_M']])
|