Spaces:

Fralet
/

personality

Sleeping

App Files Files Community

personality / app.py

Fralet

Update app.py

bb3c6bc verified 8 months ago

raw

history blame

2.72 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary NLTK resources
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize the zero-shot classification pipeline
	classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

	# Streamlit interface setup
	st.title("Resume-based Personality Prediction by Serikov Ayanbek")
	resume_text = st.text_area("Enter Resume Text Here", height=300)

	# Load data from Excel
	data = pd.read_excel("ResponseTest.xlsx")
	data_open = pd.read_excel("ResponseOpen.xlsx")

	# Define preprocessing function
	def preprocess_text(text):
	text = re.sub(r'\W', ' ', str(text))
	text = text.lower()
	text = re.sub(r'\s+[a-z]\s+', ' ', text)
	text = re.sub(r'^[a-z]\s+', ' ', text)
	text = re.sub(r'\s+', ' ', text)
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()
	tokens = text.split()
	tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
	return ' '.join(tokens)

	# Prepare the data for prediction
	data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text)
	data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
	data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)

	labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
	confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

	# Automatic prediction on resume text input
	if resume_text:
	processed_resume = preprocess_text(resume_text)
	resume_prediction = classifier(processed_resume, labels)
	highest_score_label = resume_prediction['labels'][0]
	st.write("Predicted Personality for the given resume:", highest_score_label)

	# Automatic prediction for each row in DataFrame
	for index, row in data.iterrows():
	result = classifier(row['processed_text'], labels)
	data.at[index, 'Predicted'] = result['labels'][0]
	st.dataframe(data[['True_label', 'Predicted']])

	# Separate predictions for Female and Male questions
	for index, row in data_open.iterrows():
	result_f = classifier(row['processed_text_open'], labels)
	result_m = classifier(row['processed_text_mopen'], labels)
	data_open.at[index, 'Predicted_F'] = result_f['labels'][0]
	data_open.at[index, 'Predicted_M'] = result_m['labels'][0]
	st.dataframe(data_open[['True_label', 'Predicted_F', 'Predicted_M']])