Spaces:
Sleeping
Sleeping
Commit
·
be58b51
1
Parent(s):
d5c2d7f
Add personas
Browse files- app.py +5 -0
- page_personas.py +47 -0
app.py
CHANGED
@@ -4,6 +4,7 @@ import page_attitudes
|
|
4 |
import page_demographics
|
5 |
import page_shopping
|
6 |
import page_investing
|
|
|
7 |
import page_tests
|
8 |
from urllib.parse import quote, unquote
|
9 |
from datasets import load_dataset
|
@@ -39,6 +40,8 @@ if st.sidebar.button("Shopping"):
|
|
39 |
st.session_state['page'] = 'Shopping'
|
40 |
if st.sidebar.button("Investing"):
|
41 |
st.session_state['page'] = 'Investing'
|
|
|
|
|
42 |
if st.sidebar.button("Tests"):
|
43 |
st.session_state['page'] = 'Tests'
|
44 |
|
@@ -58,6 +61,8 @@ elif st.session_state['page'] == 'Shopping':
|
|
58 |
page_shopping.show(df)
|
59 |
elif st.session_state['page'] == 'Investing':
|
60 |
page_investing.show(df)
|
|
|
|
|
61 |
elif st.session_state['page'] == 'Tests':
|
62 |
page_tests.show(df)
|
63 |
|
|
|
4 |
import page_demographics
|
5 |
import page_shopping
|
6 |
import page_investing
|
7 |
+
import page_personas
|
8 |
import page_tests
|
9 |
from urllib.parse import quote, unquote
|
10 |
from datasets import load_dataset
|
|
|
40 |
st.session_state['page'] = 'Shopping'
|
41 |
if st.sidebar.button("Investing"):
|
42 |
st.session_state['page'] = 'Investing'
|
43 |
+
if st.sidebar.button("Personas"):
|
44 |
+
st.session_state['page'] = 'Personas'
|
45 |
if st.sidebar.button("Tests"):
|
46 |
st.session_state['page'] = 'Tests'
|
47 |
|
|
|
61 |
page_shopping.show(df)
|
62 |
elif st.session_state['page'] == 'Investing':
|
63 |
page_investing.show(df)
|
64 |
+
elif st.session_state['page'] == 'Personas':
|
65 |
+
page_personas.show(df)
|
66 |
elif st.session_state['page'] == 'Tests':
|
67 |
page_tests.show(df)
|
68 |
|
page_personas.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.cluster import KMeans
|
2 |
+
from scipy.stats import chisquare
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
|
7 |
+
@st.cache_data
|
8 |
+
def show(df):
|
9 |
+
st.title("Clustering Students to Build Personas")
|
10 |
+
st.write("Clustering based on 36 fields of Likert data")
|
11 |
+
perform_kmeans_clustering(df)
|
12 |
+
|
13 |
+
|
14 |
+
def perform_kmeans_clustering(df):
|
15 |
+
# Read the fields from the uploaded Python file
|
16 |
+
likert_flat_fields = [
|
17 |
+
# your list of questions here...
|
18 |
+
]
|
19 |
+
|
20 |
+
# Select only the relevant columns for clustering
|
21 |
+
df_likert_real_data = df[likert_flat_fields]
|
22 |
+
|
23 |
+
# Drop rows with missing values for a more accurate clustering
|
24 |
+
df_likert_real_data = df_likert_real_data.dropna()
|
25 |
+
|
26 |
+
# Perform k-means clustering to group students into 3 clusters
|
27 |
+
kmeans_real_data = KMeans(n_clusters=3, n_init=10,
|
28 |
+
random_state=42).fit(df_likert_real_data)
|
29 |
+
|
30 |
+
# Add the cluster labels to the DataFrame
|
31 |
+
df_likert_real_data['Cluster'] = kmeans_real_data.labels_
|
32 |
+
|
33 |
+
# Calculate the mean score for each question in each cluster
|
34 |
+
cluster_means_real_data = df_likert_real_data.groupby(
|
35 |
+
'Cluster').mean().reset_index()
|
36 |
+
|
37 |
+
# Count the number of students in each cluster
|
38 |
+
cluster_counts = df_likert_real_data['Cluster'].value_counts(
|
39 |
+
).sort_index().reset_index()
|
40 |
+
cluster_counts.columns = ['Cluster', 'Number of Students']
|
41 |
+
|
42 |
+
# Display the tables in Streamlit
|
43 |
+
st.write("Number of Students in Each Cluster:")
|
44 |
+
st.table(cluster_counts)
|
45 |
+
|
46 |
+
st.write("Mean Scores for Each Question in Each Cluster:")
|
47 |
+
st.table(cluster_means_real_data)
|