Spaces:
Sleeping
Sleeping
Commit
·
541b321
1
Parent(s):
3c1c755
Visualize shopping data
Browse files- page_shopping.py +71 -69
page_shopping.py
CHANGED
@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt
|
|
4 |
import seaborn as sns
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
-
import networkx as nx
|
8 |
from fields.likert_flat_fields import likert_flat_fields
|
9 |
#from fields.boolean_fields import boolean_fields
|
10 |
|
@@ -15,11 +14,19 @@ def show(df):
|
|
15 |
st.title("Shopping")
|
16 |
st.markdown(
|
17 |
f"<h2 style='text-align: center;'>Boycott Count (Overall)</h2>", unsafe_allow_html=True)
|
18 |
-
show_boycott_count(df,
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
|
|
23 |
# Count the number of people who have invested and who have not
|
24 |
boycott_count = df["你/妳有沒有抵制過某公司?"].value_counts().reset_index()
|
25 |
boycott_count.columns = ['Boycott', 'Count']
|
@@ -28,92 +35,87 @@ def show_boycott_count(df, font_prop):
|
|
28 |
plt.figure(figsize=(10, 6))
|
29 |
barplot = sns.barplot(x='Boycott', y='Count', data=boycott_count, palette='viridis')
|
30 |
ax = plt.gca() # Get the current Axes instance on the current figure matching the given keyword args, or create one.
|
31 |
-
ax.set_xticklabels(ax.get_xticklabels(), fontproperties=
|
32 |
|
33 |
# Add labels and title
|
34 |
-
plt.xlabel('Have you ever boycotted a company?', fontsize=12, fontproperties=
|
35 |
-
plt.ylabel('Count', fontsize=12, fontproperties=
|
36 |
-
plt.title("Number of People Who Have/Haven't Boycotted a Company", fontsize=16, fontproperties=
|
37 |
|
38 |
# Display values on the bars
|
39 |
for index, value in enumerate(boycott_count['Count']):
|
40 |
-
plt.text(index, value, str(value), ha='center', va='bottom', fontproperties=
|
41 |
|
42 |
# Display the chart in Streamlit
|
43 |
st.pyplot(plt)
|
44 |
|
45 |
-
def
|
46 |
-
|
47 |
-
filtered_df = df[likert_flat_fields]
|
48 |
-
filtered_df = filtered_df.apply(pd.to_numeric, errors='coerce')
|
49 |
|
50 |
-
|
51 |
-
|
52 |
|
53 |
-
|
54 |
-
|
55 |
|
56 |
-
# Iterate over the correlation matrix and add edges
|
57 |
-
for i in range(len(corr_matrix.columns)):
|
58 |
-
for j in range(i):
|
59 |
-
if abs(corr_matrix.iloc[i, j]) > threshold: # only consider strong correlations
|
60 |
-
graph.add_edge(corr_matrix.columns[i], corr_matrix.columns[j], weight=corr_matrix.iloc[i, j])
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
weights = [graph[u][v]['weight'] for u, v in edges] # Use the weights for edge width
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
nx.draw_networkx_edges(graph, pos, edgelist=edges, width=weights, alpha=0.5, edge_color='gray')
|
70 |
|
71 |
-
#
|
72 |
-
|
73 |
-
x, y = pos[label]
|
74 |
-
plt.text(x, y, label, fontsize=9, fontproperties=chinese_font, ha='center', va='center')
|
75 |
|
76 |
-
|
77 |
-
|
|
|
78 |
|
79 |
-
#
|
80 |
-
|
81 |
|
82 |
-
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
]
|
87 |
|
88 |
-
|
89 |
-
for field in boolean_fields:
|
90 |
-
df[field + '_encoded'] = df[field].map({'有': 1, '沒有': 0})
|
91 |
|
92 |
-
#
|
93 |
-
|
94 |
-
|
95 |
-
# Calculate the correlation matrix
|
96 |
-
correlation_data = df[all_fields].corr()
|
97 |
-
|
98 |
-
# Define a threshold for strong correlations
|
99 |
-
threshold = 0.5
|
100 |
-
|
101 |
-
# Find all fields that have at least one strong correlation
|
102 |
-
strong_fields = correlation_data.columns[np.abs(correlation_data).max() > threshold]
|
103 |
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
#
|
108 |
-
plt.
|
109 |
-
ax = sns.heatmap(filtered_correlation_data, annot=True, fmt=".2f", cmap="coolwarm")
|
110 |
|
111 |
-
#
|
112 |
-
|
113 |
-
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import seaborn as sns
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
|
|
7 |
from fields.likert_flat_fields import likert_flat_fields
|
8 |
#from fields.boolean_fields import boolean_fields
|
9 |
|
|
|
14 |
st.title("Shopping")
|
15 |
st.markdown(
|
16 |
f"<h2 style='text-align: center;'>Boycott Count (Overall)</h2>", unsafe_allow_html=True)
|
17 |
+
show_boycott_count(df, chinese_font)
|
18 |
+
st.markdown(
|
19 |
+
f"<h2 style='text-align: center;'>Why Boycott</h2>", unsafe_allow_html=True)
|
20 |
+
summarize_why_boycott(df, chinese_font)
|
21 |
+
st.markdown(
|
22 |
+
f"<h2 style='text-align: center;'>Trusted Brands</h2>", unsafe_allow_html=True)
|
23 |
+
summarize_trusted_brands(df, chinese_font)
|
24 |
+
st.markdown(
|
25 |
+
f"<h2 style='text-align: center;'>Choice Experiments</h2>", unsafe_allow_html=True)
|
26 |
+
visualize_shopping_data(df, chinese_font)
|
27 |
|
28 |
+
|
29 |
+
def show_boycott_count(df, chinese_font):
|
30 |
# Count the number of people who have invested and who have not
|
31 |
boycott_count = df["你/妳有沒有抵制過某公司?"].value_counts().reset_index()
|
32 |
boycott_count.columns = ['Boycott', 'Count']
|
|
|
35 |
plt.figure(figsize=(10, 6))
|
36 |
barplot = sns.barplot(x='Boycott', y='Count', data=boycott_count, palette='viridis')
|
37 |
ax = plt.gca() # Get the current Axes instance on the current figure matching the given keyword args, or create one.
|
38 |
+
ax.set_xticklabels(ax.get_xticklabels(), fontproperties=chinese_font)
|
39 |
|
40 |
# Add labels and title
|
41 |
+
plt.xlabel('Have you ever boycotted a company?', fontsize=12, fontproperties=chinese_font)
|
42 |
+
plt.ylabel('Count', fontsize=12, fontproperties=chinese_font)
|
43 |
+
plt.title("Number of People Who Have/Haven't Boycotted a Company", fontsize=16, fontproperties=chinese_font)
|
44 |
|
45 |
# Display values on the bars
|
46 |
for index, value in enumerate(boycott_count['Count']):
|
47 |
+
plt.text(index, value, str(value), ha='center', va='bottom', fontproperties=chinese_font)
|
48 |
|
49 |
# Display the chart in Streamlit
|
50 |
st.pyplot(plt)
|
51 |
|
52 |
+
def summarize_why_boycott(df, chinese_font):
|
|
|
|
|
|
|
53 |
|
54 |
+
boycott_reasons = df["為什麼抵制?"].value_counts()
|
55 |
+
summary = boycott_reasons.sort_values(ascending=False)
|
56 |
|
57 |
+
st.write("Summary of Why Boycott:")
|
58 |
+
st.table(summary)
|
59 |
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
def summarize_trusted_brands(df, chinese_font):
|
62 |
+
# Get the count of responses in the "你/妳有信任的品牌嗎?" field
|
63 |
+
trusted_brands = df["你/妳有信任的品牌嗎?"].value_counts()
|
|
|
64 |
|
65 |
+
# List of responses to combine as 'no trusted brand'
|
66 |
+
no_brand_responses = ["無", "沒有", "沒有特別", "🈚️", "目前沒有", "No", "沒", "沒有特別關注", "沒有特別信任的", "不知道", "無特別選擇", "目前沒有完全信任的", "沒有特定的", "沒有特定", "沒有特別研究", "目前沒有特別關注的品牌","N", "none", "無特別", "目前無", "沒有特別想到", "沒有固定的", "x", "沒在買", "nope", "一時想不到…", "沒有特別注意", "無特別的品牌", "無絕對信任的品牌", "不確定你說的範圍", "還沒有"]
|
|
|
67 |
|
68 |
+
# Calculate the combined 'no trusted brand' count
|
69 |
+
no_brand_count = trusted_brands[no_brand_responses].sum()
|
|
|
|
|
70 |
|
71 |
+
# Remove the individual 'no brand' responses and add the combined count
|
72 |
+
trusted_brands_combined = trusted_brands.drop(no_brand_responses)
|
73 |
+
trusted_brands_combined.loc['No trusted brand'] = no_brand_count
|
74 |
|
75 |
+
# Combine non-specified brands
|
76 |
+
have_but_not_specified = ["有", "有", "Yes", "應該有"]
|
77 |
|
78 |
+
# Calculate the combined 'not specified brand' count
|
79 |
+
have_but_not_specified_count = trusted_brands_combined[have_but_not_specified].sum()
|
80 |
|
81 |
+
# Remove the individual 'not specified brand' responses and add the combined count
|
82 |
+
trusted_brands_combined = trusted_brands_combined.drop(have_but_not_specified)
|
83 |
+
trusted_brands_combined.loc['Have but not specified'] = have_but_not_specified_count
|
84 |
|
85 |
+
summary = trusted_brands_combined.sort_values(ascending=False)
|
|
|
|
|
86 |
|
87 |
+
# Return the sorted series with combined 'no brand' count
|
88 |
+
st.write("Summary of Trusted Brands:")
|
89 |
+
st.table(summary)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
|
92 |
+
def visualize_shopping_data(df, chinese_font):
|
93 |
+
# Shopping fields with their corresponding titles
|
94 |
+
shopping_fields = {
|
95 |
+
"你/妳會買哪一種番茄?": "Which Type of Tomatoes Would You Buy?",
|
96 |
+
"你/妳買哪種牛奶?": "Which Type of Milk Would You Buy?",
|
97 |
+
"你/妳會買哪種雞蛋?": "Which Type of Eggs Would You Buy?"
|
98 |
+
}
|
99 |
|
100 |
+
# Create a figure and a set of subplots
|
101 |
+
fig, axes = plt.subplots(len(shopping_fields), 1, figsize=(10, 6 * len(shopping_fields)))
|
|
|
102 |
|
103 |
+
# If there's only one field to plot, axes will not be an array, so we wrap it in a list
|
104 |
+
if not isinstance(axes, np.ndarray):
|
105 |
+
axes = [axes]
|
106 |
|
107 |
+
for ax, (column_name, title) in zip(axes, shopping_fields.items()):
|
108 |
+
# Summarize the data
|
109 |
+
data = df[column_name].value_counts().head(20) # Adjust the number as needed
|
110 |
+
|
111 |
+
# Plot the data
|
112 |
+
data.plot(kind='bar', color='skyblue', ax=ax, fontsize=12)
|
113 |
+
ax.set_title(title, fontproperties=chinese_font)
|
114 |
+
ax.set_xlabel('Options', fontproperties=chinese_font)
|
115 |
+
ax.set_ylabel('Count', fontproperties=chinese_font)
|
116 |
+
|
117 |
+
# Set the properties for the x-tick labels
|
118 |
+
ax.set_xticklabels(data.index, rotation=45, ha='right', fontproperties=chinese_font)
|
119 |
+
|
120 |
+
plt.tight_layout()
|
121 |
+
st.pyplot(fig)
|