"""import streamlit as st import pandas as pd from transformers import pipeline import re import nltk from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import logging # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Download necessary NLTK resources nltk.download('stopwords') nltk.download('wordnet') # Initialize the zero-shot classification pipeline classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Streamlit interface setup st.title("Resume-based Personality Prediction by Serikov Ayanbek") resume_text = st.text_area("Enter Resume Text Here", height=300) # Load data from Excel data = pd.read_excel("ResponseTest.xlsx") data_open = pd.read_excel("ResponseOpen.xlsx") # Define preprocessing function def preprocess_text(text): text = re.sub(r'\W', ' ', str(text)) text = text.lower() text = re.sub(r'\s+[a-z]\s+', ' ', text) text = re.sub(r'^[a-z]\s+', ' ', text) text = re.sub(r'\s+', ' ', text) stop_words = set(stopwords.words('english')) lemmatizer = WordNetLemmatizer() tokens = text.split() tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words] return ' '.join(tokens) # Prepare the data for prediction data['processed_text'] = data[['CV/Resume'] + [f'Q{i}' for i in range(1, 37)]].agg(lambda x: ', '.join(x), axis=1).apply(preprocess_text) data_open['processed_text_open'] = data_open[['Demo_F', 'Question']].agg(' '.join, axis=1).apply(preprocess_text) data_open['processed_text_mopen'] = data_open[['Demo_M', 'Question']].agg(' '.join, axis=1).apply(preprocess_text) labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"] # Function to predict personality and log the predictions def predict_and_log(data, prediction_column, process_text_column, true_label_column=None, custom_labels=None): for index, row in data.iterrows(): processed_text = row[process_text_column] if custom_labels: result = classifier(processed_text, [row[label] for label in custom_labels]) else: result = classifier(processed_text, labels) highest_score_label = result['labels'][0] data.at[index, prediction_column] = highest_score_label true_label = row[true_label_column] if true_label_column else 'Not available' data_id = row['id'] logging.info(f"Row {data_id}: True Label - {true_label}, {prediction_column} - {highest_score_label}") # Predict and log results for each DataFrame # predict_and_log(data, 'Predicted', 'processed_text', true_label_column='True_label', custom_labels=['MAX1', 'MAX2', 'MAX3']) predict_and_log(data_open, 'Predicted_F', 'processed_text_open', true_label_column='True_label') predict_and_log(data_open, 'Predicted_M', 'processed_text_mopen', true_label_column='True_label') # Optionally display a confirmation message st.write("Predictions have been logged. Check your logs for details.") """ import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix # Load data data = pd.read_excel("ResponseOpenPredicted.xlsx") st.title("Resume-based Personality Prediction by Serikov Ayanbek") # Function to calculate metrics def calculate_metrics(true_labels, predicted_labels): accuracy = accuracy_score(true_labels, predicted_labels) precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted') return accuracy, precision, recall, f1_score # Metrics Calculation accuracy_f, precision_f, recall_f, f1_score_f = calculate_metrics(data['True_label'], data['Predicted_F']) accuracy_m, precision_m, recall_m, f1_score_m = calculate_metrics(data['True_label'], data['Predicted_M']) # Plotting function for confusion matrices def plot_confusion_matrix(conf_matrix, title): fig, ax = plt.subplots() sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", ax=ax) plt.title(title) plt.xlabel('Predicted Labels') plt.ylabel('True Labels') st.pyplot(fig) # Plotting function for distribution of predictions def plot_predictions_distribution(data, column, title): fig, ax = plt.subplots() sns.countplot(x=column, data=data, palette="viridis") plt.title(title) plt.xlabel('Predicted Labels') plt.ylabel('Count') st.pyplot(fig) # Streamlit app structure st.title('Model Performance Evaluation') st.subheader('Performance Metrics') st.write(f"Accuracy for Predicted_F: {accuracy_f:.2%}") st.write(f"Precision for Predicted_F: {precision_f:.2%}") st.write(f"Recall for Predicted_F: {recall_f:.2%}") st.write(f"F1-Score for Predicted_F: {f1_score_f:.2%}") st.write(f"Accuracy for Predicted_M: {accuracy_m:.2%}") st.write(f"Precision for Predicted_M: {precision_m:.2%}") st.write(f"Recall for Predicted_M: {recall_m:.2%}") st.write(f"F1-Score for Predicted_M: {f1_score_m:.2%}") st.subheader('Confusion Matrices') conf_matrix_f = confusion_matrix(data['True_label'], data['Predicted_F']) conf_matrix_m = confusion_matrix(data['True_label'], data['Predicted_M']) plot_confusion_matrix(conf_matrix_f, 'Confusion Matrix for Predicted_F') plot_confusion_matrix(conf_matrix_m, 'Confusion Matrix for Predicted_M') st.subheader('Distribution of Prediction Results') st.write("Distribution for Predicted_F") plot_predictions_distribution(data, 'Predicted_F', 'Distribution of Predictions for Female Demographic') st.write("Distribution for Predicted_M") plot_predictions_distribution(data, 'Predicted_M', 'Distribution of Predictions for Male Demographic')