krishaamer commited on
Commit
541b321
·
1 Parent(s): 3c1c755

Visualize shopping data

Browse files
Files changed (1) hide show
  1. page_shopping.py +71 -69
page_shopping.py CHANGED
@@ -4,7 +4,6 @@ import matplotlib.pyplot as plt
4
  import seaborn as sns
5
  import pandas as pd
6
  import numpy as np
7
- import networkx as nx
8
  from fields.likert_flat_fields import likert_flat_fields
9
  #from fields.boolean_fields import boolean_fields
10
 
@@ -15,11 +14,19 @@ def show(df):
15
  st.title("Shopping")
16
  st.markdown(
17
  f"<h2 style='text-align: center;'>Boycott Count (Overall)</h2>", unsafe_allow_html=True)
18
- show_boycott_count(df, font_prop=chinese_font)
19
- #generate_correlation_chart(df, chinese_font)
20
- create_correlation_network(df, 0.4, chinese_font)
 
 
 
 
 
 
 
21
 
22
- def show_boycott_count(df, font_prop):
 
23
  # Count the number of people who have invested and who have not
24
  boycott_count = df["你/妳有沒有抵制過某公司?"].value_counts().reset_index()
25
  boycott_count.columns = ['Boycott', 'Count']
@@ -28,92 +35,87 @@ def show_boycott_count(df, font_prop):
28
  plt.figure(figsize=(10, 6))
29
  barplot = sns.barplot(x='Boycott', y='Count', data=boycott_count, palette='viridis')
30
  ax = plt.gca() # Get the current Axes instance on the current figure matching the given keyword args, or create one.
31
- ax.set_xticklabels(ax.get_xticklabels(), fontproperties=font_prop)
32
 
33
  # Add labels and title
34
- plt.xlabel('Have you ever boycotted a company?', fontsize=12, fontproperties=font_prop)
35
- plt.ylabel('Count', fontsize=12, fontproperties=font_prop)
36
- plt.title("Number of People Who Have/Haven't Boycotted a Company", fontsize=16, fontproperties=font_prop)
37
 
38
  # Display values on the bars
39
  for index, value in enumerate(boycott_count['Count']):
40
- plt.text(index, value, str(value), ha='center', va='bottom', fontproperties=font_prop)
41
 
42
  # Display the chart in Streamlit
43
  st.pyplot(plt)
44
 
45
- def create_correlation_network(df, threshold, chinese_font):
46
-
47
- filtered_df = df[likert_flat_fields]
48
- filtered_df = filtered_df.apply(pd.to_numeric, errors='coerce')
49
 
50
- # Now you can calculate the correlation matrix and create the network
51
- corr_matrix = filtered_df.corr()
52
 
53
- # Create a graph
54
- graph = nx.Graph()
55
 
56
- # Iterate over the correlation matrix and add edges
57
- for i in range(len(corr_matrix.columns)):
58
- for j in range(i):
59
- if abs(corr_matrix.iloc[i, j]) > threshold: # only consider strong correlations
60
- graph.add_edge(corr_matrix.columns[i], corr_matrix.columns[j], weight=corr_matrix.iloc[i, j])
61
 
62
- # Draw the network
63
- pos = nx.spring_layout(graph, k=0.1, iterations=20)
64
- edges = graph.edges()
65
- weights = [graph[u][v]['weight'] for u, v in edges] # Use the weights for edge width
66
 
67
- plt.figure(figsize=(10, 10))
68
- nx.draw_networkx_nodes(graph, pos, node_size=500, node_color='lightblue', edgecolors='black')
69
- nx.draw_networkx_edges(graph, pos, edgelist=edges, width=weights, alpha=0.5, edge_color='gray')
70
 
71
- # Set Chinese font
72
- for label in graph.nodes():
73
- x, y = pos[label]
74
- plt.text(x, y, label, fontsize=9, fontproperties=chinese_font, ha='center', va='center')
75
 
76
- plt.title('Correlation Network', fontproperties=chinese_font)
77
- plt.axis('off') # Turn off the axis
 
78
 
79
- # Use Streamlit to render the plot
80
- st.pyplot(plt)
81
 
82
- def generate_correlation_chart(df, chinese_font):
 
83
 
84
- boolean_fields = [
85
- '你/妳覺得目前有任何投資嗎?'
86
- ]
87
 
88
- # Encode boolean fields
89
- for field in boolean_fields:
90
- df[field + '_encoded'] = df[field].map({'有': 1, '沒有': 0})
91
 
92
- # Combine all fields for correlation
93
- all_fields = likert_flat_fields + [f"{field}_encoded" for field in boolean_fields]
94
-
95
- # Calculate the correlation matrix
96
- correlation_data = df[all_fields].corr()
97
-
98
- # Define a threshold for strong correlations
99
- threshold = 0.5
100
-
101
- # Find all fields that have at least one strong correlation
102
- strong_fields = correlation_data.columns[np.abs(correlation_data).max() > threshold]
103
 
104
- # Filter the correlation matrix to only include these fields
105
- filtered_correlation_data = correlation_data.loc[strong_fields, strong_fields]
 
 
 
 
 
 
106
 
107
- # Plot the correlation matrix
108
- plt.figure(figsize=(10, 8))
109
- ax = sns.heatmap(filtered_correlation_data, annot=True, fmt=".2f", cmap="coolwarm")
110
 
111
- # Set the labels with the Chinese font
112
- ax.set_xticklabels(ax.get_xticklabels(), fontproperties=chinese_font, rotation=45, ha='right')
113
- ax.set_yticklabels(ax.get_yticklabels(), fontproperties=chinese_font, rotation=0)
114
 
115
- # Set the title with the Chinese font
116
- plt.title("強相關分析", fontproperties=chinese_font)
117
-
118
- # Show the plot in Streamlit
119
- st.pyplot(plt)
 
 
 
 
 
 
 
 
 
 
 
4
  import seaborn as sns
5
  import pandas as pd
6
  import numpy as np
 
7
  from fields.likert_flat_fields import likert_flat_fields
8
  #from fields.boolean_fields import boolean_fields
9
 
 
14
  st.title("Shopping")
15
  st.markdown(
16
  f"<h2 style='text-align: center;'>Boycott Count (Overall)</h2>", unsafe_allow_html=True)
17
+ show_boycott_count(df, chinese_font)
18
+ st.markdown(
19
+ f"<h2 style='text-align: center;'>Why Boycott</h2>", unsafe_allow_html=True)
20
+ summarize_why_boycott(df, chinese_font)
21
+ st.markdown(
22
+ f"<h2 style='text-align: center;'>Trusted Brands</h2>", unsafe_allow_html=True)
23
+ summarize_trusted_brands(df, chinese_font)
24
+ st.markdown(
25
+ f"<h2 style='text-align: center;'>Choice Experiments</h2>", unsafe_allow_html=True)
26
+ visualize_shopping_data(df, chinese_font)
27
 
28
+
29
+ def show_boycott_count(df, chinese_font):
30
  # Count the number of people who have invested and who have not
31
  boycott_count = df["你/妳有沒有抵制過某公司?"].value_counts().reset_index()
32
  boycott_count.columns = ['Boycott', 'Count']
 
35
  plt.figure(figsize=(10, 6))
36
  barplot = sns.barplot(x='Boycott', y='Count', data=boycott_count, palette='viridis')
37
  ax = plt.gca() # Get the current Axes instance on the current figure matching the given keyword args, or create one.
38
+ ax.set_xticklabels(ax.get_xticklabels(), fontproperties=chinese_font)
39
 
40
  # Add labels and title
41
+ plt.xlabel('Have you ever boycotted a company?', fontsize=12, fontproperties=chinese_font)
42
+ plt.ylabel('Count', fontsize=12, fontproperties=chinese_font)
43
+ plt.title("Number of People Who Have/Haven't Boycotted a Company", fontsize=16, fontproperties=chinese_font)
44
 
45
  # Display values on the bars
46
  for index, value in enumerate(boycott_count['Count']):
47
+ plt.text(index, value, str(value), ha='center', va='bottom', fontproperties=chinese_font)
48
 
49
  # Display the chart in Streamlit
50
  st.pyplot(plt)
51
 
52
+ def summarize_why_boycott(df, chinese_font):
 
 
 
53
 
54
+ boycott_reasons = df["為什麼抵制?"].value_counts()
55
+ summary = boycott_reasons.sort_values(ascending=False)
56
 
57
+ st.write("Summary of Why Boycott:")
58
+ st.table(summary)
59
 
 
 
 
 
 
60
 
61
+ def summarize_trusted_brands(df, chinese_font):
62
+ # Get the count of responses in the "你/妳有信任的品牌嗎?" field
63
+ trusted_brands = df["你/妳有信任的品牌嗎?"].value_counts()
 
64
 
65
+ # List of responses to combine as 'no trusted brand'
66
+ no_brand_responses = ["無", "沒有", "沒有特別", "🈚️", "目前沒有", "No", "沒", "沒有特別關注", "沒有特別信任的", "不知道", "無特別選擇", "目前沒有完全信任的", "沒有特定的", "沒有特定", "沒有特別研究", "目前沒有特別關注的品牌","N", "none", "無特別", "目前無", "沒有特別想到", "沒有固定的", "x", "沒在買", "nope", "一時想不到…", "沒有特別注意", "無特別的品牌", "無絕對信任的品牌", "不確定你說的範圍", "還沒有"]
 
67
 
68
+ # Calculate the combined 'no trusted brand' count
69
+ no_brand_count = trusted_brands[no_brand_responses].sum()
 
 
70
 
71
+ # Remove the individual 'no brand' responses and add the combined count
72
+ trusted_brands_combined = trusted_brands.drop(no_brand_responses)
73
+ trusted_brands_combined.loc['No trusted brand'] = no_brand_count
74
 
75
+ # Combine non-specified brands
76
+ have_but_not_specified = ["有", "有", "Yes", "應該有"]
77
 
78
+ # Calculate the combined 'not specified brand' count
79
+ have_but_not_specified_count = trusted_brands_combined[have_but_not_specified].sum()
80
 
81
+ # Remove the individual 'not specified brand' responses and add the combined count
82
+ trusted_brands_combined = trusted_brands_combined.drop(have_but_not_specified)
83
+ trusted_brands_combined.loc['Have but not specified'] = have_but_not_specified_count
84
 
85
+ summary = trusted_brands_combined.sort_values(ascending=False)
 
 
86
 
87
+ # Return the sorted series with combined 'no brand' count
88
+ st.write("Summary of Trusted Brands:")
89
+ st.table(summary)
 
 
 
 
 
 
 
 
90
 
91
+
92
+ def visualize_shopping_data(df, chinese_font):
93
+ # Shopping fields with their corresponding titles
94
+ shopping_fields = {
95
+ "你/妳會買哪一種番茄?": "Which Type of Tomatoes Would You Buy?",
96
+ "你/妳買哪種牛奶?": "Which Type of Milk Would You Buy?",
97
+ "你/妳會買哪種雞蛋?": "Which Type of Eggs Would You Buy?"
98
+ }
99
 
100
+ # Create a figure and a set of subplots
101
+ fig, axes = plt.subplots(len(shopping_fields), 1, figsize=(10, 6 * len(shopping_fields)))
 
102
 
103
+ # If there's only one field to plot, axes will not be an array, so we wrap it in a list
104
+ if not isinstance(axes, np.ndarray):
105
+ axes = [axes]
106
 
107
+ for ax, (column_name, title) in zip(axes, shopping_fields.items()):
108
+ # Summarize the data
109
+ data = df[column_name].value_counts().head(20) # Adjust the number as needed
110
+
111
+ # Plot the data
112
+ data.plot(kind='bar', color='skyblue', ax=ax, fontsize=12)
113
+ ax.set_title(title, fontproperties=chinese_font)
114
+ ax.set_xlabel('Options', fontproperties=chinese_font)
115
+ ax.set_ylabel('Count', fontproperties=chinese_font)
116
+
117
+ # Set the properties for the x-tick labels
118
+ ax.set_xticklabels(data.index, rotation=45, ha='right', fontproperties=chinese_font)
119
+
120
+ plt.tight_layout()
121
+ st.pyplot(fig)