personality / app.py
Fralet's picture
Update app.py
75a0105 verified
raw
history blame
2.12 kB
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="Fralet/personality")
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
# Load data from Excel
data = pd.read_excel("your_excel_file.xlsx") # Replace 'your_excel_file.xlsx' with your actual file name
# Preprocess text function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text is re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Combine relevant text columns for processing
question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust range if needed
data['combined_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
data['processed_text'] = data['combined_text'].apply(preprocess_text)
# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality"):
# Function to apply predictions using dynamic labels from MAX1, MAX2, MAX3
def get_predictions(row):
custom_labels = [row['MAX1'], row['MAX2'], row['MAX3']] # Get labels from each row
processed_text = row['processed_text']
result = classifier(processed_text, custom_labels)
return [label for label, score in zip(result['labels'], result['scores']) if score >= confidence_threshold]
# Apply predictions across all rows
data['predicted_labels'] = data.apply(get_predictions, axis=1)
st.dataframe(data[['True_label', 'Predicted', 'predicted_labels']])