Spaces:
Sleeping
Sleeping
File size: 9,745 Bytes
88a3f04 5cdd4a1 895141b 324d859 2ee3ecc c05213f 2ee3ecc 8853706 76aff4b 2ee3ecc 1b50b66 e27efab 7711d36 1b50b66 76aff4b b4628ad d605d91 76aff4b bb3c6bc 76aff4b bb3c6bc 76aff4b a1a24b4 76aff4b 75a0105 76aff4b 2ee3ecc bb3c6bc eee7f0b d605d91 6297210 48b2405 8853706 c09d452 8853706 c09d452 402111a 5b10278 8853706 ed8e17f c09d452 d605d91 c08559d 8853706 eb86ee3 a278b80 ceeef94 5f38853 b96c15e 5f38853 ceeef94 a278b80 ceeef94 5f38853 ceeef94 5f38853 ceeef94 b96c15e 5f38853 b96c15e 5f38853 b96c15e 5f38853 b96c15e 5f38853 b96c15e 5f38853 b96c15e 5f38853 b96c15e 5f38853 5cdd4a1 245e663 eb86ee3 245e663 2992308 245e663 2992308 245e663 2992308 245e663 2992308 245e663 2992308 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
"""
import streamlit as st
import pandas as pd
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Streamlit interface setup
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
resume_text = st.text_area("Enter Resume Text Here", height=300)
# Load data from Excel
data = pd.read_excel("ResponseTest.xlsx")
data_open = pd.read_excel("ResponseOpen.xlsx")
# Define preprocessing function
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
# Prepare the data for prediction
data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text)
data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text)
labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Function to predict personality and log the predictions
def predict_and_log(data, prediction_column, process_text_column, true_label_column=None, custom_labels=None):
for index, row in data.iterrows():
processed_text = row[process_text_column]
if custom_labels:
result = classifier(processed_text, [row[label] for label in custom_labels])
else:
result = classifier(processed_text, labels)
highest_score_label = result['labels'][0]
data.at[index, prediction_column] = highest_score_label
true_label = row[true_label_column] if true_label_column else 'Not available'
data_id = row['id']
logging.info(f"Row {data_id}: True Label - {true_label}, {prediction_column} - {highest_score_label}")
# Predict and log results for each DataFrame
# predict_and_log(data, 'Predicted', 'processed_text', true_label_column='True_label', custom_labels=['MAX1', 'MAX2', 'MAX3'])
predict_and_log(data_open, 'Predicted_F', 'processed_text_open', true_label_column='True_label')
predict_and_log(data_open, 'Predicted_M', 'processed_text_mopen', true_label_column='True_label')
# Optionally display a confirmation message
st.write("Predictions have been logged. Check your logs for details.")
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from sklearn.preprocessing import LabelEncoder
# Load data
data = pd.read_excel("ResponseOpenPredicted.xlsx")
st.title("Resume-based Personality Prediction by Serikov Ayanbek")
# Function to calculate metrics
def calculate_metrics(true_labels, predicted_labels):
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
return accuracy, precision, recall, f1_score
# Metrics Calculation
accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F'])
accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M'])
# Encode labels for better visualization
le = LabelEncoder()
data['True_label_encoded'] = le.fit_transform(data['True_label'])
data['Predicted_F_encoded'] = le.transform(data['Predicted_F'])
data['Predicted_M_encoded'] = le.transform(data['Predicted_M'])
# Plotting function for confusion matrices
def plot_confusion_matrix(true_labels, predicted_labels, title):
conf_matrix = confusion_matrix(true_labels, predicted_labels)
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax,
xticklabels=le.classes_, yticklabels=le.classes_)
plt.title(title)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
st.pyplot(fig)
# Plotting function for distribution of predictions
def plot_predictions_distribution(data, column, title):
fig, ax = plt.subplots()
sns.countplot(x=column, data=data, palette="viridis")
plt.title(title)
plt.xlabel('Predicted Labels')
plt.ylabel('Count')
plt.xticks(rotation=45)
ax.set_xticklabels(le.classes_)
plt.subplots_adjust(bottom=0.2)
st.pyplot(fig)
# Streamlit app structure
st.title('Model Performance Evaluation')
st.subheader('Performance Metrics')
st.write(f"Accuracy for Predicted_F: {accuracy_f:.2%}")
st.write(f"Precision for Predicted_F: {precision_f:.2%}")
st.write(f"Recall for Predicted_F: {recall_f:.2%}")
st.write(f"F1-Score for Predicted_F: {f1_score_f:.2%}")
st.write(f"Accuracy for Predicted_M: {accuracy_m:.2%}")
st.write(f"Precision for Predicted_M: {precision_m:.2%}")
st.write(f"Recall for Predicted_M: {recall_m:.2%}")
st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}")
st.subheader('Confusion Matrices')
plot_confusion_matrix(data['True_label_encoded'], data['Predicted_F_encoded'], 'Confusion Matrix for Predicted_F')
plot_confusion_matrix(data['True_label_encoded'], data['Predicted_M_encoded'], 'Confusion Matrix for Predicted_M')
st.subheader('Distribution of Prediction Results')
st.write("Distribution for Predicted_F")
plot_predictions_distribution(data, 'Predicted_F_encoded', 'Distribution of Predictions for Female Demographic')
st.write("Distribution for Predicted_M")
plot_predictions_distribution(data, 'Predicted_M_encoded', 'Distribution of Predictions for Male Demographic')
import streamlit as st
from transformers import pipeline
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Define the candidate labels according to the Enneagram types
default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
# Streamlit interface
st.title("Resume-based Personality Prediction")
resume_text = st.text_area("Enter Resume Text Here", height=300)
# User-defined labels option
user_labels = st.text_input("Enter custom labels separated by comma (optional)")
labels = user_labels.split(',') if user_labels else default_labels
# Prediction confidence threshold
confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
if st.button("Predict Personality"):
# Text Preprocessing
def preprocess_text(text):
text = re.sub(r'\W', ' ', str(text))
text = text.lower()
text = re.sub(r'\s+[a-z]\s+', ' ', text)
text = re.sub(r'^[a-z]\s+', ' ', text)
text = re.sub(r'\s+', ' ', text)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = text.split()
tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
return ' '.join(tokens)
processed_text = preprocess_text(resume_text)
# Make prediction
result = classifier(processed_text, labels)
# Display the results
st.write("Predictions (above confidence threshold):")
displayed = False
for label, score in zip(result['labels'], result['scores']):
if score >= confidence_threshold:
st.write(f"{label}: {score*100:.2f}%")
displayed = True
if not displayed:
st.write("No predictions exceed the confidence threshold.")
"""
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
# Check if CUDA is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the model and tokenizer
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli').to(device)
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
premise = 'A few years ago, I was juggling a demanding job, volunteer commitments, and personal relationships, all while trying to manage chronic health issues. The challenge was overwhelming at times, but I approached it by prioritizing open communication with my employer and loved ones about my limits. I learned to delegate and accept help, which was difficult for me as I usually prefer to keep the peace by handling things myself. This experience taught me the importance of setting boundaries and the strength in vulnerability.'
hypothesis = 'This example is Helper.'
# Tokenize the input text pair
inputs = tokenizer.encode(premise, hypothesis, return_tensors='pt', truncation_strategy='only_first').to(device)
# Perform inference
logits = nli_model(inputs)[0]
# Process logits to get probabilities
entail_contradiction_logits = logits[:, [0, 2]]
probs = entail_contradiction_logits.softmax(dim=1)
prob_label_is_true = probs[:, 1]
# Print the probability that the label is true
print(prob_label_is_true) |