Spaces:

Fralet
/

personality

Sleeping

App Files Files Community

personality / app.py

Fralet

Update app.py

324d859 verified 8 months ago

raw

history blame

2.5 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary NLTK resources
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize the zero-shot classification pipeline
	classifier = pipeline("zero-shot-classification", model="Fralet/personality")

	# Define the default candidate labels (modifiable if different labels are needed)
	default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]

	# Streamlit interface setup
	st.title("Resume-based Personality Prediction by Serikov Ayanbek")

	# Load data from Excel
	data = pd.read_excel("your_excel_file.xlsx") # Adjust file path/name as necessary

	# Preprocess text function
	def preprocess_text(text):
	text = re.sub(r'\W', ' ', str(text))
	text = text.lower()
	text = re.sub(r'\s+[a-z]\s+', ' ', text)
	text = re.sub(r'^[a-z]\s+', ' ', text)
	text is re.sub(r'\s+', ' ', text)
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()
	tokens = text.split()
	tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
	return ' '.join(tokens)

	# Combine relevant text columns
	data['combined_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(' '.join, axis=1)
	data['processed_text'] = data['combined_text'].apply(preprocess_text)

	# Streamlit user inputs
	user_labels = st.text_input("Enter custom labels separated by comma (optional)")
	labels = user_labels.split(',') if user_labels else default_labels
	confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

	if st.button("Predict Personality"):
	# Predict personality from processed text
	data['predictions'] = data['processed_text'].apply(lambda x: classifier(x, labels))

	# Extract predictions above confidence threshold and display alongside MAX labels
	data['predicted_labels'] = data['predictions'].apply(lambda x: {label: f"{score*100:.2f}%" for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold})
	data['MAX_labels'] = data.apply(lambda x: [x['MAX1'], x['MAX2'], x['MAX3']], axis=1)

	st.write("Predictions and Labels:")
	st.dataframe(data[['True_label', 'Predicted', 'predicted_labels', 'MAX_labels']])

	# Run this last part to show the DataFrame outside the button press if needed
	st.dataframe(data)