Spaces:

Fralet
/

personality

Sleeping

App Files Files Community

personality / app.py

Fralet

Update app.py

b1a564a verified 8 months ago

raw

history blame

3.92 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary NLTK resources
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize the zero-shot classification pipeline
	classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

	# Streamlit interface setup
	st.title("Resume-based Personality Prediction by Serikov Ayanbek")
	resume_text = st.text_area("Enter Resume Text Here", height=300)

	# Load data from Excel
	data = pd.read_excel("ResponseTest.xlsx") # Replace 'ResponseTest.xlsx' with your actual file name
	data_open = pd.read_excel("ResponseOpen.xlsx") # Replace 'ResponseTest.xlsx' with your actual file name

	# Preprocess text function
	def preprocess_text(text):
	text = re.sub(r'\W', ' ', str(text))
	text = text.lower()
	text = re.sub(r'\s+[a-z]\s+', ' ', text)
	text = re.sub(r'^[a-z]\s+', ' ', text)
	text = re.sub(r'\s+', ' ', text)
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()
	tokens = text.split()
	tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
	return ' '.join(tokens)

	# Combine relevant text columns for processing
	question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust the range based on your data columns
	data['processed_text'] = data[['CV/Resume'] + question_columns].agg(lambda x: ', '.join(x), axis=1)
	#data['processed_text'] = data[['CV/Resume'] + question_columns].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text)
	#data_open['processed_text_open'] = data_open[['CV/Resume', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
	data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1)
	data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1)

	labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]

	# Prediction confidence threshold
	confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

	if st.button("Predict Personality by Test"):
	# Function to apply predictions using dynamic labels from MAX1, MAX2, MAX3 and only return the highest scored label
	def get_predictions(row):
	custom_labels = [row['MAX1'], row['MAX2'], row['MAX3']] # Get labels from each row
	processed_text = row['processed_text']
	result = classifier(processed_text, custom_labels)
	highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
	return highest_score_label

	# Apply predictions across all rows
	data['Predicted'] = data.apply(get_predictions, axis=1)
	st.dataframe(data[['True_label','MAX1','MAX2','MAX3', 'Predicted']])

	if st.button("Predict Personality by Open Question F"):
	def get_predictions(row):
	processed_text = row['processed_text_open']
	result = classifier(processed_text, labels)
	highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
	return highest_score_label



	# Apply predictions across all rows
	data_open['Predicted_F'] = data_open.apply(get_predictions, axis=1)
	st.dataframe(data_open[['True_label', 'Predicted_F']])

	if st.button("Predict Personality by Open Question M"):


	def get_predictionsM(row):
	processed_text = row['processed_text_mopen']
	result = classifier(processed_text, labels)
	highest_score_label = result['labels'][0] # Assumes the labels are sorted by score, highest first
	return highest_score_label

	# Apply predictions across all rows
	data_open['Predicted_M'] = data_open.apply(get_predictionsM, axis=1)
	st.dataframe(data_open[['True_label', 'Predicted_M']])