Spaces:

Fralet
/

personality

Sleeping

App Files Files Community

personality / app.py

Fralet

Update app.py

895141b verified 12 months ago

raw

history blame

2.3 kB

	import streamlit as st
	from transformers import pipeline
	import pandas as pd
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary NLTK resources
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize the zero-shot classification pipeline
	classifier = pipeline("zero-shot-classification", model="Fralet/personality")

	# Define the candidate labels according to the Enneagram types
	default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]

	# Streamlit interface setup
	st.title("Resume-based Personality Prediction by Serikov Ayanbek")

	# Load data from Excel
	data = pd.read_excel("ResponseTest.xlsx") # Replace 'your_excel_file.xlsx' with your actual file name

	# Preprocess text function
	def preprocess_text(text):
	text = re.sub(r'\W', ' ', str(text))
	text = text.lower()
	text = re.sub(r'\s+[a-z]\s+', ' ', text)
	text = re.sub(r'^[a-z]\s+', ' ', text)
	text = re.sub(r'\s+', ' ', text)
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()
	tokens = text.split()
	tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
	return ' '.join(tokens)

	# User-defined labels option
	user_labels = st.text_input("Enter custom labels separated by comma (optional)")
	labels = user_labels.split(',') if user_labels else default_labels

	# Prediction confidence threshold
	confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

	if st.button("Predict Personality"):
	# Combine relevant text columns
	question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust range if there are more or fewer question columns
	data['combined_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
	data['processed_text'] = data['combined_text'].apply(preprocess_text)

	# Make predictions
	predictions = data['processed_text'].apply(lambda x: classifier(x, labels))

	# Extract and display predictions
	data['predicted_labels'] = predictions.apply(lambda x: [label for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold])
	st.dataframe(data[['True_label', 'Predicted', 'predicted_labels']])