Spaces:
Sleeping
Sleeping
Commit
·
ba2cd93
1
Parent(s):
6e83e2f
Clarify several pages
Browse files- fields/investing_flat_fields.py +7 -0
- page_attitudes.py +1 -1
- page_home.py +9 -8
- page_personas.py +67 -44
- page_shopping.py +12 -11
- page_tests.py +24 -16
fields/investing_flat_fields.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
investing_flat_fields = [
|
2 |
+
"你/妳會對金錢感到焦慮嗎?",
|
3 |
+
"你/妳會對金錢很節儉嗎?",
|
4 |
+
"你/妳會經常存錢嗎?",
|
5 |
+
"你/妳對自己的財務知識滿意嗎?",
|
6 |
+
"你/妳投資會考慮環保嗎?"
|
7 |
+
]
|
page_attitudes.py
CHANGED
@@ -10,7 +10,7 @@ from fields.translation_mapping import translation_mapping
|
|
10 |
@st.cache_data
|
11 |
def show(df):
|
12 |
st.title("Student Attitudes (Overall)")
|
13 |
-
st.write("Student
|
14 |
|
15 |
# Chinese font
|
16 |
chinese_font = FontProperties(fname='mingliu.ttf')
|
|
|
10 |
@st.cache_data
|
11 |
def show(df):
|
12 |
st.title("Student Attitudes (Overall)")
|
13 |
+
st.write("Student attitudes across all likert fields without clustering")
|
14 |
|
15 |
# Chinese font
|
16 |
chinese_font = FontProperties(fname='mingliu.ttf')
|
page_home.py
CHANGED
@@ -3,15 +3,16 @@ import streamlit as st
|
|
3 |
|
4 |
def show():
|
5 |
st.title("Survey Overview")
|
6 |
-
st.markdown('''A survey of Taiwanese college students (excludes overseas Chinese-speaking students as well as foreign students).
|
7 |
-
\n* Survey Oct.
|
8 |
\n* 2000 cards with a QR code printed out
|
9 |
-
\n* Distribution conducted
|
10 |
-
\n*
|
11 |
-
\n*
|
12 |
\n* Data after filtering: 675 people aged 18-26 (Gen-Z), Taiwanese, current students in BA (large majority), MA (small minority) or PhD level (very few respondents)
|
13 |
-
\n* 36
|
14 |
-
\n* 14 product features (multiple-choice)
|
15 |
\n* 6 choice experiments
|
16 |
-
\n*
|
|
|
17 |
)
|
|
|
3 |
|
4 |
def show():
|
5 |
st.title("Survey Overview")
|
6 |
+
st.markdown('''A survey of Taiwanese college students (excludes overseas Chinese-speaking students as well as foreign students) covering attitudes towards shopping, saving, investing, economy, nature, sustainability, and AI.
|
7 |
+
\n* Survey Oct. 13th - Nov. 3rd, 2023
|
8 |
\n* 2000 cards with a QR code printed out
|
9 |
+
\n* Distribution conducted at 8 universities (handing out the cards)
|
10 |
+
\n* 1289 people started the survey, 518 quit
|
11 |
+
\n* 771 people completed the whole survey
|
12 |
\n* Data after filtering: 675 people aged 18-26 (Gen-Z), Taiwanese, current students in BA (large majority), MA (small minority) or PhD level (very few respondents)
|
13 |
+
\n* 36 likert fields (5-point scale) used for clustering the students into 3 personas with K-means clustering
|
14 |
+
\n* 14 product features (multiple-choice) used for K-modes clustering
|
15 |
\n* 6 choice experiments
|
16 |
+
\n* 2 option ranking questions
|
17 |
+
\n* 10 text fields used to enrich the personas'''
|
18 |
)
|
page_personas.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
6 |
import squarify
|
@@ -11,36 +12,49 @@ from fields.likert_flat_fields import likert_flat_fields
|
|
11 |
|
12 |
#@st.cache_data
|
13 |
def show(df):
|
14 |
-
st.title("Clustering Students to Build Personas")
|
15 |
-
st.write("Clustering Students based on 36 fields of Likert data")
|
16 |
-
|
17 |
-
st.title("Top 10 highest agreement between personas")
|
18 |
-
|
19 |
-
create_treemap()
|
20 |
-
|
21 |
-
st.title("Top 10 highest disagreement between Personas")
|
22 |
-
|
23 |
# Chinese font
|
24 |
chinese_font = FontProperties(fname='mingliu.ttf')
|
25 |
-
|
26 |
-
show_clustering_heatmap(df, chinese_font)
|
27 |
-
|
28 |
# Prepare the data and perform clustering and PCA
|
29 |
df_clustered, pca, cluster_centers = prepare_data_for_pca(df)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# Show a scatterplot with all clusters included
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
# Show a scatterplot for each cluster separately
|
36 |
for cluster_id in range(3):
|
37 |
df_cluster = df_clustered[df_clustered['Cluster'] == cluster_id]
|
38 |
-
plot_scatterplot(df_cluster, pca, cluster_centers, chinese_font,
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
|
|
|
|
41 |
|
|
|
42 |
|
43 |
-
def
|
44 |
|
45 |
# Select only the relevant columns for clustering
|
46 |
df_likert_real_data = df[likert_flat_fields]
|
@@ -59,20 +73,12 @@ def perform_kmeans_clustering(df):
|
|
59 |
cluster_means_real_data = df_likert_real_data.groupby(
|
60 |
'Cluster').mean().reset_index()
|
61 |
|
62 |
-
# Count the number of students in each cluster
|
63 |
-
cluster_counts = df_likert_real_data['Cluster'].value_counts(
|
64 |
-
).sort_index().reset_index()
|
65 |
-
cluster_counts.columns = ['Cluster', 'Number of Students']
|
66 |
-
|
67 |
# Display the table
|
68 |
-
st.
|
69 |
-
st.write("Mean Scores for Each Question in Each Cluster:")
|
70 |
st.table(cluster_means_real_data)
|
71 |
|
72 |
|
73 |
def show_clustering_heatmap(df, chinese_font):
|
74 |
-
st.title("Heatmap")
|
75 |
-
|
76 |
# Filter the DataFrame to only include the Likert scale fields
|
77 |
df_likert_data = df[likert_flat_fields]
|
78 |
|
@@ -100,9 +106,9 @@ def show_clustering_heatmap(df, chinese_font):
|
|
100 |
ax.set_ylabel('Cluster ID', fontproperties=chinese_font)
|
101 |
|
102 |
# Rotate the x-axis labels for better readability
|
103 |
-
|
|
|
104 |
|
105 |
-
# Use the figure object (fig) in st.pyplot() to display the plot
|
106 |
st.pyplot(fig)
|
107 |
|
108 |
|
@@ -125,32 +131,49 @@ def prepare_data_for_pca(df):
|
|
125 |
return df_clustered, pca, cluster_centers
|
126 |
|
127 |
|
128 |
-
def plot_scatterplot(df, pca, cluster_centers, chinese_font, title):
|
129 |
# Create a figure and a set of subplots
|
130 |
fig, ax = plt.subplots(figsize=(10, 10))
|
131 |
|
132 |
-
#
|
133 |
-
|
134 |
-
data=df, palette='viridis', s=100, alpha=0.6, ax=ax)
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
144 |
alpha=0.75, marker='o', edgecolors='k')
|
|
|
|
|
|
|
145 |
|
|
|
146 |
ax.set_title(title, fontproperties=chinese_font)
|
147 |
-
ax.set_xlabel('Principal Component 1')
|
148 |
-
ax.set_ylabel('Principal Component 2')
|
149 |
|
150 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
st.pyplot(fig)
|
152 |
|
153 |
|
|
|
|
|
154 |
def create_treemap():
|
155 |
categories = {
|
156 |
'Ethical Consumption and Labor Concerns': 3.2,
|
@@ -181,7 +204,7 @@ def create_treemap():
|
|
181 |
ax.axis('off')
|
182 |
|
183 |
# Add a title to the plot
|
184 |
-
plt.title('
|
185 |
|
186 |
# Use the figure object (fig) in st.pyplot() to display the plot
|
187 |
st.pyplot(fig)
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
+
import textwrap
|
5 |
import matplotlib.pyplot as plt
|
6 |
import seaborn as sns
|
7 |
import squarify
|
|
|
12 |
|
13 |
#@st.cache_data
|
14 |
def show(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Chinese font
|
16 |
chinese_font = FontProperties(fname='mingliu.ttf')
|
17 |
+
|
|
|
|
|
18 |
# Prepare the data and perform clustering and PCA
|
19 |
df_clustered, pca, cluster_centers = prepare_data_for_pca(df)
|
20 |
|
21 |
+
# Titles
|
22 |
+
st.title("Personas")
|
23 |
+
st.write("Based on 36 fields of likert data")
|
24 |
+
|
25 |
+
# Retain colors
|
26 |
+
unique_clusters = df_clustered['Cluster'].unique()
|
27 |
+
palette = sns.color_palette('pastel', n_colors=len(unique_clusters))
|
28 |
+
cluster_palette = {cluster: color for cluster, color in zip(unique_clusters, palette)}
|
29 |
+
|
30 |
+
# Cluster names
|
31 |
+
cluster_names = {
|
32 |
+
0: 'Sustainable',
|
33 |
+
1: 'Moderate',
|
34 |
+
2: 'Frugal',
|
35 |
+
}
|
36 |
+
|
37 |
# Show a scatterplot with all clusters included
|
38 |
+
st.markdown(
|
39 |
+
f"<h2 style='text-align: center;'>Clustering Students to Build 3 Personas</h2>", unsafe_allow_html=True)
|
40 |
+
plot_scatterplot(df_clustered, pca, cluster_centers, chinese_font, cluster_palette, cluster_names, "Distinct Respondent Profiles Based on K-means Clustering")
|
41 |
|
42 |
# Show a scatterplot for each cluster separately
|
43 |
for cluster_id in range(3):
|
44 |
df_cluster = df_clustered[df_clustered['Cluster'] == cluster_id]
|
45 |
+
plot_scatterplot(df_cluster, pca, cluster_centers, chinese_font, cluster_palette, cluster_names, title=f"Scatterplot for Cluster {cluster_id}")
|
46 |
+
|
47 |
+
st.markdown(
|
48 |
+
f"<h2 style='text-align: center;'>Mean Answer Scores</h2>", unsafe_allow_html=True)
|
49 |
+
get_kmeans_table(df)
|
50 |
+
show_clustering_heatmap(df, chinese_font)
|
51 |
|
52 |
+
st.markdown(
|
53 |
+
f"<h2 style='text-align: center;'>Agreement between personas</h2>", unsafe_allow_html=True)
|
54 |
|
55 |
+
create_treemap()
|
56 |
|
57 |
+
def get_kmeans_table(df):
|
58 |
|
59 |
# Select only the relevant columns for clustering
|
60 |
df_likert_real_data = df[likert_flat_fields]
|
|
|
73 |
cluster_means_real_data = df_likert_real_data.groupby(
|
74 |
'Cluster').mean().reset_index()
|
75 |
|
|
|
|
|
|
|
|
|
|
|
76 |
# Display the table
|
77 |
+
st.write("Mean response values for each likert question in each cluster:")
|
|
|
78 |
st.table(cluster_means_real_data)
|
79 |
|
80 |
|
81 |
def show_clustering_heatmap(df, chinese_font):
|
|
|
|
|
82 |
# Filter the DataFrame to only include the Likert scale fields
|
83 |
df_likert_data = df[likert_flat_fields]
|
84 |
|
|
|
106 |
ax.set_ylabel('Cluster ID', fontproperties=chinese_font)
|
107 |
|
108 |
# Rotate the x-axis labels for better readability
|
109 |
+
wrapped_labels = [textwrap.fill(label.get_text(), width=10) for label in ax.get_xticklabels()]
|
110 |
+
ax.set_xticklabels(wrapped_labels, rotation=45, fontproperties=chinese_font)
|
111 |
|
|
|
112 |
st.pyplot(fig)
|
113 |
|
114 |
|
|
|
131 |
return df_clustered, pca, cluster_centers
|
132 |
|
133 |
|
134 |
+
def plot_scatterplot(df, pca, cluster_centers, chinese_font, cluster_palette, cluster_names, title):
|
135 |
# Create a figure and a set of subplots
|
136 |
fig, ax = plt.subplots(figsize=(10, 10))
|
137 |
|
138 |
+
# Calculate cluster counts
|
139 |
+
cluster_counts = df['Cluster'].value_counts()
|
|
|
140 |
|
141 |
+
# Plot the scatterplot
|
142 |
+
scatter = sns.scatterplot(x='Component_1', y='Component_2', hue='Cluster',
|
143 |
+
data=df, palette=cluster_palette, s=100, alpha=0.6, ax=ax)
|
144 |
+
|
145 |
+
# Get unique cluster labels sorted by value
|
146 |
+
unique_clusters = sorted(df['Cluster'].unique())
|
147 |
+
|
148 |
+
# Add the cluster centers for all clusters if plotting combined scatterplot
|
149 |
+
for label in unique_clusters:
|
150 |
+
# Use the label to index cluster_centers directly if it's a dictionary
|
151 |
+
center = cluster_centers[label]
|
152 |
+
ax.scatter(center[0], center[1], c=cluster_palette[label], s=200,
|
153 |
alpha=0.75, marker='o', edgecolors='k')
|
154 |
+
# Annotate the number of respondents in the cluster
|
155 |
+
ax.text(center[0], center[1], str(cluster_counts[label]), color='black',
|
156 |
+
ha='center', va='center', fontproperties=chinese_font)
|
157 |
|
158 |
+
# Set titles and labels
|
159 |
ax.set_title(title, fontproperties=chinese_font)
|
160 |
+
ax.set_xlabel('Principal Component 1', fontproperties=chinese_font)
|
161 |
+
ax.set_ylabel('Principal Component 2', fontproperties=chinese_font)
|
162 |
|
163 |
+
# Extract handles and labels from the scatterplot
|
164 |
+
handles, labels = scatter.get_legend_handles_labels()
|
165 |
+
|
166 |
+
# Update labels with custom names and counts
|
167 |
+
new_labels = [f'Cluster {label}: {cluster_names[label]} (n={cluster_counts[label]})' for label in unique_clusters]
|
168 |
+
# Update the legend with the new labels
|
169 |
+
ax.legend(handles=handles, labels=new_labels, title='Personas', loc='upper right')
|
170 |
+
|
171 |
+
# Use the figure object (fig) to display the plot
|
172 |
st.pyplot(fig)
|
173 |
|
174 |
|
175 |
+
|
176 |
+
|
177 |
def create_treemap():
|
178 |
categories = {
|
179 |
'Ethical Consumption and Labor Concerns': 3.2,
|
|
|
204 |
ax.axis('off')
|
205 |
|
206 |
# Add a title to the plot
|
207 |
+
plt.title('Average Agreement Level by Question Category', fontsize=15)
|
208 |
|
209 |
# Use the figure object (fig) in st.pyplot() to display the plot
|
210 |
st.pyplot(fig)
|
page_shopping.py
CHANGED
@@ -13,14 +13,15 @@ def show(df):
|
|
13 |
# Load the Chinese font
|
14 |
chinese_font = FontProperties(fname='mingliu.ttf', size=12)
|
15 |
st.title("Shopping")
|
16 |
-
st.write("Clustering
|
17 |
-
st.title("Boycott Count")
|
18 |
-
show_boycott_count(df, font_prop=chinese_font)
|
19 |
clusters = perform_kmodes_clustering(df, prod_feat_flat_fields)
|
20 |
-
st.
|
|
|
21 |
show_radar_chart(clusters, font_prop=chinese_font)
|
22 |
-
st.title("Feature Preferences")
|
23 |
plot_feature_preferences(clusters, font_prop=chinese_font)
|
|
|
|
|
|
|
24 |
|
25 |
def show_boycott_count(df, font_prop):
|
26 |
# Count the number of people who have invested and who have not
|
@@ -75,9 +76,9 @@ def perform_kmodes_clustering(df, feature_columns, n_clusters=3):
|
|
75 |
def show_radar_chart(clusters, font_prop):
|
76 |
|
77 |
df_dict={
|
78 |
-
'
|
79 |
-
'
|
80 |
-
'
|
81 |
}
|
82 |
|
83 |
feature_translations_dict = dict(zip(prod_feat_flat_fields, feature_translations))
|
@@ -146,9 +147,9 @@ def plot_feature_preferences(clusters, font_prop):
|
|
146 |
"老實說我對任何環保資訊都沒有太多興趣\nHonestly, I'm Not Very Interested in Any Eco Information",
|
147 |
"投資前比較公司的環保表現\nCompare Companies' Environmental Performance Before Investing"
|
148 |
],
|
149 |
-
'
|
150 |
-
'
|
151 |
-
'
|
152 |
}
|
153 |
# Create a DataFrame
|
154 |
df = pd.DataFrame(data)
|
|
|
13 |
# Load the Chinese font
|
14 |
chinese_font = FontProperties(fname='mingliu.ttf', size=12)
|
15 |
st.title("Shopping")
|
16 |
+
st.write("Clustering students based on AI-assistant feature choices")
|
|
|
|
|
17 |
clusters = perform_kmodes_clustering(df, prod_feat_flat_fields)
|
18 |
+
st.markdown(
|
19 |
+
f"<h2 style='text-align: center;'>Feature Preferences</h2>", unsafe_allow_html=True)
|
20 |
show_radar_chart(clusters, font_prop=chinese_font)
|
|
|
21 |
plot_feature_preferences(clusters, font_prop=chinese_font)
|
22 |
+
st.markdown(
|
23 |
+
f"<h2 style='text-align: center;'>Boycott Count</h2>", unsafe_allow_html=True)
|
24 |
+
show_boycott_count(df, font_prop=chinese_font)
|
25 |
|
26 |
def show_boycott_count(df, font_prop):
|
27 |
# Count the number of people who have invested and who have not
|
|
|
76 |
def show_radar_chart(clusters, font_prop):
|
77 |
|
78 |
df_dict={
|
79 |
+
'Conscious (n=340)': clusters[0],
|
80 |
+
'Interested (n=215)': clusters[1],
|
81 |
+
'Advocate (n=126)': clusters[2]
|
82 |
}
|
83 |
|
84 |
feature_translations_dict = dict(zip(prod_feat_flat_fields, feature_translations))
|
|
|
147 |
"老實說我對任何環保資訊都沒有太多興趣\nHonestly, I'm Not Very Interested in Any Eco Information",
|
148 |
"投資前比較公司的環保表現\nCompare Companies' Environmental Performance Before Investing"
|
149 |
],
|
150 |
+
'Conscious (n=340)': [0.367, 0.415, 0.191, 0.176, 0.079, 1.000, 0.197, 0.265, 0.144, 0.241, 0.144, 0.332, 0.044, 0.188],
|
151 |
+
'Interested (n=215)': [0.260, 0.163, 0.153, 0.191, 0.107, 0.000, 0.135, 0.219, 0.172, 0.186, 0.093, 0.214, 0.233, 0.130],
|
152 |
+
'Advocate (n=126)': [0.825, 0.881, 0.460, 0.746, 0.230, 0.881, 0.667, 0.690, 0.421, 0.865, 0.468, 0.778, 0.143, 0.738]
|
153 |
}
|
154 |
# Create a DataFrame
|
155 |
df = pd.DataFrame(data)
|
page_tests.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
from scipy.stats import chisquare
|
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
from fields.likert_flat_fields import likert_flat_fields
|
5 |
|
6 |
-
|
7 |
def show(df):
|
8 |
st.title("Statistical Tests")
|
|
|
9 |
show_chi_square_results(df)
|
10 |
|
11 |
def show_chi_square_results(df):
|
@@ -13,40 +15,46 @@ def show_chi_square_results(df):
|
|
13 |
|
14 |
for field in likert_flat_fields:
|
15 |
observed_values = df[field].value_counts().sort_index()
|
16 |
-
observed_values = observed_values.
|
17 |
expected_values = [len(df) / len(observed_values)] * len(observed_values)
|
18 |
-
expected_values = [float(x) for x in expected_values]
|
19 |
chi_stat, p_value = chisquare(f_obs=observed_values, f_exp=expected_values)
|
20 |
chi_square_results[field] = {'Chi-Square Statistic': chi_stat, 'p-value': p_value}
|
21 |
|
22 |
chi_square_df = pd.DataFrame.from_dict(chi_square_results, orient='index')
|
23 |
chi_square_df['p-value'] = chi_square_df['p-value'].astype(float)
|
24 |
|
|
|
|
|
|
|
25 |
# Reset index to add a sequence number
|
26 |
chi_square_df.reset_index(inplace=True)
|
27 |
chi_square_df.rename(columns={'index': 'Question'}, inplace=True)
|
28 |
|
29 |
# Define thresholds for highlighting
|
30 |
chi_square_threshold = 300 # example threshold for high Chi-Square value
|
31 |
-
p_value_threshold = 1e-50
|
32 |
|
33 |
# Apply the highlighting
|
34 |
-
def highlight(value):
|
35 |
-
if
|
36 |
-
return "background-color: yellow"
|
37 |
-
elif isinstance(value, float) and value < p_value_threshold:
|
38 |
return "background-color: yellow"
|
|
|
|
|
39 |
else:
|
40 |
-
return
|
41 |
-
|
42 |
-
# Apply the highlighting to numeric columns only
|
43 |
-
chi_square_df_styled = chi_square_df.style.applymap(highlight, subset=pd.IndexSlice[:, ['Chi-Square Statistic', 'p-value']])
|
44 |
|
45 |
-
#
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
# Convert styled DataFrame to HTML
|
49 |
chi_square_html = chi_square_df_styled.to_html(escape=False)
|
50 |
|
51 |
# Display the HTML with unsafe_allow_html set to True
|
52 |
-
st.markdown(chi_square_html, unsafe_allow_html=True)
|
|
|
1 |
from scipy.stats import chisquare
|
2 |
+
from functools import partial
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
from fields.likert_flat_fields import likert_flat_fields
|
6 |
|
7 |
+
#@st.cache_data
|
8 |
def show(df):
|
9 |
st.title("Statistical Tests")
|
10 |
+
st.write("Yellow Chi-Square statistics (high) and pink p-values (low) are statistically meaningful")
|
11 |
show_chi_square_results(df)
|
12 |
|
13 |
def show_chi_square_results(df):
|
|
|
15 |
|
16 |
for field in likert_flat_fields:
|
17 |
observed_values = df[field].value_counts().sort_index()
|
18 |
+
observed_values = observed_values.reindex(index=range(1, 6), fill_value=0)
|
19 |
expected_values = [len(df) / len(observed_values)] * len(observed_values)
|
|
|
20 |
chi_stat, p_value = chisquare(f_obs=observed_values, f_exp=expected_values)
|
21 |
chi_square_results[field] = {'Chi-Square Statistic': chi_stat, 'p-value': p_value}
|
22 |
|
23 |
chi_square_df = pd.DataFrame.from_dict(chi_square_results, orient='index')
|
24 |
chi_square_df['p-value'] = chi_square_df['p-value'].astype(float)
|
25 |
|
26 |
+
# Convert p-values to string for formatting
|
27 |
+
chi_square_df['p-value'] = chi_square_df['p-value'].apply(lambda x: "{:.2e}".format(x))
|
28 |
+
|
29 |
# Reset index to add a sequence number
|
30 |
chi_square_df.reset_index(inplace=True)
|
31 |
chi_square_df.rename(columns={'index': 'Question'}, inplace=True)
|
32 |
|
33 |
# Define thresholds for highlighting
|
34 |
chi_square_threshold = 300 # example threshold for high Chi-Square value
|
35 |
+
p_value_threshold = 1e-50 # example threshold for very low p-value
|
36 |
|
37 |
# Apply the highlighting
|
38 |
+
def highlight(value, chi_square_threshold, p_value_threshold, col_name):
|
39 |
+
if col_name == 'Chi-Square Statistic' and float(value) > chi_square_threshold:
|
|
|
|
|
40 |
return "background-color: yellow"
|
41 |
+
elif col_name == 'p-value' and float(value) < p_value_threshold:
|
42 |
+
return "background-color: pink"
|
43 |
else:
|
44 |
+
return None
|
|
|
|
|
|
|
45 |
|
46 |
+
# Create partial functions for each column to apply the highlight with the column name
|
47 |
+
highlight_chi_square = partial(highlight, chi_square_threshold=chi_square_threshold,
|
48 |
+
p_value_threshold=p_value_threshold, col_name='Chi-Square Statistic')
|
49 |
+
highlight_p_value = partial(highlight, chi_square_threshold=chi_square_threshold,
|
50 |
+
p_value_threshold=p_value_threshold, col_name='p-value')
|
51 |
+
|
52 |
+
# Apply the highlighting to the DataFrame
|
53 |
+
chi_square_df_styled = chi_square_df.style.applymap(highlight_chi_square, subset=['Chi-Square Statistic']) \
|
54 |
+
.applymap(highlight_p_value, subset=['p-value'])
|
55 |
|
56 |
+
# Convert styled DataFrame to HTML
|
57 |
chi_square_html = chi_square_df_styled.to_html(escape=False)
|
58 |
|
59 |
# Display the HTML with unsafe_allow_html set to True
|
60 |
+
st.markdown(chi_square_html, unsafe_allow_html=True)
|