import streamlit as st import pandas as pd from transformers import pipeline import re import nltk from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer # Download necessary NLTK resources nltk.download('stopwords') nltk.download('wordnet') # Initialize the zero-shot classification pipeline classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Streamlit interface setup st.title("Resume-based Personality Prediction by Serikov Ayanbek") resume_text = st.text_area("Enter Resume Text Here", height=300) # Load data from Excel data = pd.read_excel("ResponseTest.xlsx") data_open = pd.read_excel("ResponseOpen.xlsx") # Define preprocessing function def preprocess_text(text): text = re.sub(r'\W', ' ', str(text)) text = text.lower() text = re.sub(r'\s+[a-z]\s+', ' ', text) text = re.sub(r'^[a-z]\s+', ' ', text) text = re.sub(r'\s+', ' ', text) stop_words = set(stopwords.words('english')) lemmatizer = WordNetLemmatizer() tokens = text.split() tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words] return ' '.join(tokens) # Prepare the data for prediction data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text) data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text) data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text) labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"] confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5) # Automatic prediction on resume text input if resume_text: processed_resume = preprocess_text(resume_text) resume_prediction = classifier(processed_resume, labels) highest_score_label = resume_prediction['labels'][0] st.write("Predicted Personality for the given resume:", highest_score_label) # Automatic prediction for each row in DataFrame for index, row in data.iterrows(): result = classifier(row['processed_text'], labels) data.at[index, 'Predicted'] = result['labels'][0] st.dataframe(data[['True_label', 'Predicted']]) # Separate predictions for Female and Male questions for index, row in data_open.iterrows(): result_f = classifier(row['processed_text_open'], labels) result_m = classifier(row['processed_text_mopen'], labels) data_open.at[index, 'Predicted_F'] = result_f['labels'][0] data_open.at[index, 'Predicted_M'] = result_m['labels'][0] st.dataframe(data_open[['True_label', 'Predicted_F', 'Predicted_M']])