Fralet commited on
Commit
76aff4b
1 Parent(s): 102761a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -27
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import streamlit as st
 
2
  from transformers import pipeline
3
  import re
4
  import nltk
5
  from nltk.corpus import stopwords
6
  from nltk.stem import WordNetLemmatizer
 
 
7
  nltk.download('stopwords')
8
  nltk.download('wordnet')
9
 
@@ -13,9 +16,24 @@ classifier = pipeline("zero-shot-classification", model="Fralet/personality")
13
  # Define the candidate labels according to the Enneagram types
14
  default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
15
 
16
- # Streamlit interface
17
  st.title("Resume-based Personality Prediction by Serikov Ayanbek")
18
- resume_text = st.text_area("Enter Resume Text Here", height=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # User-defined labels option
21
  user_labels = st.text_input("Enter custom labels separated by comma (optional)")
@@ -25,30 +43,15 @@ labels = user_labels.split(',') if user_labels else default_labels
25
  confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
26
 
27
  if st.button("Predict Personality"):
28
- # Text Preprocessing
29
- def preprocess_text(text):
30
- text = re.sub(r'\W', ' ', str(text))
31
- text = text.lower()
32
- text = re.sub(r'\s+[a-z]\s+', ' ', text)
33
- text = re.sub(r'^[a-z]\s+', ' ', text)
34
- text = re.sub(r'\s+', ' ', text)
35
- stop_words = set(stopwords.words('english'))
36
- lemmatizer = WordNetLemmatizer()
37
- tokens = text.split()
38
- tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
39
- return ' '.join(tokens)
40
-
41
- processed_text = preprocess_text(resume_text)
42
 
43
- # Make prediction
44
- result = classifier(processed_text, labels)
45
 
46
- # Display the results
47
- st.write("Predictions (above confidence threshold):")
48
- displayed = False
49
- for label, score in zip(result['labels'], result['scores']):
50
- if score >= confidence_threshold:
51
- st.write(f"{label}: {score*100:.2f}%")
52
- displayed = True
53
- if not displayed:
54
- st.write("No predictions exceed the confidence threshold.")
 
1
  import streamlit as st
2
+ import pandas as pd
3
  from transformers import pipeline
4
  import re
5
  import nltk
6
  from nltk.corpus import stopwords
7
  from nltk.stem import WordNetLemmatizer
8
+
9
+ # Download necessary NLTK resources
10
  nltk.download('stopwords')
11
  nltk.download('wordnet')
12
 
 
16
  # Define the candidate labels according to the Enneagram types
17
  default_labels = ["Peacemaker", "Loyalist", "Achiever", "Reformer", "Individualist", "Helper", "Challenger", "Investigator", "Enthusiast"]
18
 
19
+ # Streamlit interface setup
20
  st.title("Resume-based Personality Prediction by Serikov Ayanbek")
21
+
22
+ # Load data from Excel
23
+ data = pd.read_excel("ResponseTest.xls") # Replace 'your_excel_file.xlsx' with your actual file name
24
+
25
+ # Preprocess text function
26
+ def preprocess_text(text):
27
+ text = re.sub(r'\W', ' ', str(text))
28
+ text = text.lower()
29
+ text = re.sub(r'\s+[a-z]\s+', ' ', text)
30
+ text = re.sub(r'^[a-z]\s+', ' ', text)
31
+ text = re.sub(r'\s+', ' ', text)
32
+ stop_words = set(stopwords.words('english'))
33
+ lemmatizer = WordNetLemmatizer()
34
+ tokens = text.split()
35
+ tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
36
+ return ' '.join(tokens)
37
 
38
  # User-defined labels option
39
  user_labels = st.text_input("Enter custom labels separated by comma (optional)")
 
43
  confidence_threshold = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
44
 
45
  if st.button("Predict Personality"):
46
+ # Combine relevant text columns
47
+ question_columns = [f'Q{i}' for i in range(1, 37)] # Adjust range if there are more or fewer question columns
48
+ data['combined_text'] = data[['CV/Resume'] + question_columns].agg(' '.join, axis=1)
49
+ data['processed_text'] = data['combined_text'].apply(preprocess_text)
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # Make predictions
52
+ predictions = data['processed_text'].apply(lambda x: classifier(x, labels))
53
 
54
+ # Extract and display predictions
55
+ data['predicted_labels'] = predictions.apply(lambda x: [label for label, score in zip(x['labels'], x['scores']) if score >= confidence_threshold])
56
+ st.dataframe(data[['True_label', 'Predicted', 'predicted_labels']])
57
+