krishaamer commited on
Commit
be58b51
·
1 Parent(s): d5c2d7f

Add personas

Browse files
Files changed (2) hide show
  1. app.py +5 -0
  2. page_personas.py +47 -0
app.py CHANGED
@@ -4,6 +4,7 @@ import page_attitudes
4
  import page_demographics
5
  import page_shopping
6
  import page_investing
 
7
  import page_tests
8
  from urllib.parse import quote, unquote
9
  from datasets import load_dataset
@@ -39,6 +40,8 @@ if st.sidebar.button("Shopping"):
39
  st.session_state['page'] = 'Shopping'
40
  if st.sidebar.button("Investing"):
41
  st.session_state['page'] = 'Investing'
 
 
42
  if st.sidebar.button("Tests"):
43
  st.session_state['page'] = 'Tests'
44
 
@@ -58,6 +61,8 @@ elif st.session_state['page'] == 'Shopping':
58
  page_shopping.show(df)
59
  elif st.session_state['page'] == 'Investing':
60
  page_investing.show(df)
 
 
61
  elif st.session_state['page'] == 'Tests':
62
  page_tests.show(df)
63
 
 
4
  import page_demographics
5
  import page_shopping
6
  import page_investing
7
+ import page_personas
8
  import page_tests
9
  from urllib.parse import quote, unquote
10
  from datasets import load_dataset
 
40
  st.session_state['page'] = 'Shopping'
41
  if st.sidebar.button("Investing"):
42
  st.session_state['page'] = 'Investing'
43
+ if st.sidebar.button("Personas"):
44
+ st.session_state['page'] = 'Personas'
45
  if st.sidebar.button("Tests"):
46
  st.session_state['page'] = 'Tests'
47
 
 
61
  page_shopping.show(df)
62
  elif st.session_state['page'] == 'Investing':
63
  page_investing.show(df)
64
+ elif st.session_state['page'] == 'Personas':
65
+ page_personas.show(df)
66
  elif st.session_state['page'] == 'Tests':
67
  page_tests.show(df)
68
 
page_personas.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.cluster import KMeans
2
+ from scipy.stats import chisquare
3
+ import streamlit as st
4
+ import pandas as pd
5
+
6
+
7
+ @st.cache_data
8
+ def show(df):
9
+ st.title("Clustering Students to Build Personas")
10
+ st.write("Clustering based on 36 fields of Likert data")
11
+ perform_kmeans_clustering(df)
12
+
13
+
14
+ def perform_kmeans_clustering(df):
15
+ # Read the fields from the uploaded Python file
16
+ likert_flat_fields = [
17
+ # your list of questions here...
18
+ ]
19
+
20
+ # Select only the relevant columns for clustering
21
+ df_likert_real_data = df[likert_flat_fields]
22
+
23
+ # Drop rows with missing values for a more accurate clustering
24
+ df_likert_real_data = df_likert_real_data.dropna()
25
+
26
+ # Perform k-means clustering to group students into 3 clusters
27
+ kmeans_real_data = KMeans(n_clusters=3, n_init=10,
28
+ random_state=42).fit(df_likert_real_data)
29
+
30
+ # Add the cluster labels to the DataFrame
31
+ df_likert_real_data['Cluster'] = kmeans_real_data.labels_
32
+
33
+ # Calculate the mean score for each question in each cluster
34
+ cluster_means_real_data = df_likert_real_data.groupby(
35
+ 'Cluster').mean().reset_index()
36
+
37
+ # Count the number of students in each cluster
38
+ cluster_counts = df_likert_real_data['Cluster'].value_counts(
39
+ ).sort_index().reset_index()
40
+ cluster_counts.columns = ['Cluster', 'Number of Students']
41
+
42
+ # Display the tables in Streamlit
43
+ st.write("Number of Students in Each Cluster:")
44
+ st.table(cluster_counts)
45
+
46
+ st.write("Mean Scores for Each Question in Each Cluster:")
47
+ st.table(cluster_means_real_data)