Spaces:

Fralet
/

personality

Running

App Files Files Community

personality / app.py

Fralet

Update app.py

75a0105 verified 8 months ago

raw

history blame

2.12 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary NLTK resources
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Initialize the zero-shot classification pipeline
	classifier = pipeline("zero-shot-classification", model="Fralet/personality")

	# Streamlit interface setup
	st.title("Resume-based Personality Prediction by Serikov Ayanbek")

	# Load data from Excel
	data = pd.read_excel("your_excel_file.xlsx") # Replace 'your_excel_file.xlsx' with your actual file name

	# Preprocess text function
	def preprocess_text(text):
	text = re.sub(r'\W', ' ', str(text))
	text = text.lower()
	text is re.sub(r'\s+[a-z]\s+', ' ', text)
	text = re.sub(r'^[a-z]\s+', ' ', text)
	text = re.sub(r'\s+', ' ', text)
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()
	tokens = text.split()
	tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
	return ' '.join(tokens)

	# Combine relevant text columns for processing
	question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust range if needed
	data['combined_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
	data['processed_text'] = data['combined_text'].apply(preprocess_text)

	# Prediction confidence threshold
	confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)

	if st.button("Predict Personality"):
	# Function to apply predictions using dynamic labels from MAX1, MAX2, MAX3
	def get_predictions(row):
	custom_labels = [row['MAX1'], row['MAX2'], row['MAX3']] # Get labels from each row
	processed_text = row['processed_text']
	result = classifier(processed_text, custom_labels)
	return [label for label, score in zip(result['labels'], result['scores']) if score >= confidence_threshold]

	# Apply predictions across all rows
	data['predicted_labels'] = data.apply(get_predictions, axis=1)
	st.dataframe(data[['True_label', 'Predicted', 'predicted_labels']])