Spaces:
Runtime error
Runtime error
update
Browse files- app.py +94 -47
- data/meta.json +1 -1
app.py
CHANGED
@@ -1,13 +1,17 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
import json
|
|
|
5 |
import altair as alt
|
|
|
|
|
|
|
|
|
6 |
from utils import get_variable_filter, is_numerical
|
7 |
|
|
|
8 |
def check_codes(var, data_meta):
|
9 |
return 'codes' in data_meta[var] and data_meta[var]['codes'] is not None and len(data_meta[var]['codes']) > 0
|
10 |
|
|
|
11 |
@st.cache(show_spinner=False)
|
12 |
def load_data():
|
13 |
with st.spinner('loading data...'):
|
@@ -21,20 +25,24 @@ def load_data():
|
|
21 |
name2id[k] = {vv: int(kk) for kk, vv in v['codes'].items()}
|
22 |
return data_main, data_meta, data_variable, data_filter, name2id
|
23 |
|
|
|
24 |
DATA_MAIN, DATA_META, DATA_VARIABLE, DATA_FILTER, NAME2ID = load_data()
|
25 |
|
|
|
26 |
def id2name(var, id_):
|
27 |
id_ = str(id_)
|
28 |
if var not in DATA_META or 'codes' not in DATA_META[var]:
|
29 |
return id_
|
30 |
return DATA_META[var]['codes'].get(id_, id_)
|
31 |
|
|
|
32 |
@st.cache
|
33 |
def name2id(var, name):
|
34 |
if var in NAME2ID:
|
35 |
return NAME2ID[var][name]
|
36 |
return name
|
37 |
|
|
|
38 |
def main():
|
39 |
# st.write(df.head())
|
40 |
load_data()
|
@@ -47,19 +55,22 @@ def main():
|
|
47 |
'Heatmap': heat,
|
48 |
'Histogram': hist,
|
49 |
}
|
50 |
-
st.title('HRL Portal')
|
51 |
with st.sidebar:
|
52 |
-
|
|
|
53 |
# st.write(DATA_VARIABLE)
|
54 |
# st.write(DATA_FILTER)
|
55 |
charts[chart]()
|
56 |
|
|
|
57 |
@st.cache
|
58 |
def get_unique(var):
|
59 |
if check_codes(var, DATA_META):
|
60 |
return list(DATA_META[var]['codes'].values())
|
61 |
return DATA_MAIN[var].unique().tolist()
|
62 |
|
|
|
63 |
def get_var_name(var):
|
64 |
if var in DATA_META and 'name' in DATA_META[var]:
|
65 |
name = DATA_META[var]['name']
|
@@ -68,75 +79,105 @@ def get_var_name(var):
|
|
68 |
return f"{var} ({name})"
|
69 |
return var
|
70 |
|
|
|
71 |
def name2var(name):
|
72 |
return name.split()[0]
|
73 |
|
|
|
74 |
def meta():
|
75 |
st.header('Variables')
|
76 |
for k, v in DATA_META.items():
|
77 |
st.subheader(k)
|
78 |
st.write(v['description'])
|
79 |
|
|
|
80 |
def area():
|
81 |
with st.sidebar:
|
82 |
-
|
|
|
83 |
var = name2var(var_name)
|
84 |
-
st.
|
|
|
85 |
filters = {}
|
86 |
for fvar in DATA_FILTER[var]:
|
87 |
if fvar == 'YEAR':
|
88 |
continue
|
89 |
if is_numerical(fvar):
|
90 |
-
names = st.slider(fvar, min(get_unique(fvar)), max(get_unique(fvar)),
|
|
|
91 |
else:
|
92 |
names = st.multiselect(fvar, get_unique(fvar), default=get_unique(fvar))
|
93 |
filters[fvar] = set([name2id(fvar, n) for n in names])
|
94 |
-
st.header(f'Area Chart
|
95 |
-
with st.expander(f'{var}'):
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
# st.write(df)
|
109 |
-
|
110 |
-
if len(df)
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
@st.cache(show_spinner=False)
|
128 |
-
def get_area_data(var, filters
|
129 |
# for k, v in filters.items():
|
130 |
# st.write(k)
|
131 |
# st.write(v)
|
132 |
# st.write(DATA_MAIN[k].unique())
|
133 |
# st.write(len(DATA_MAIN))
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
140 |
# st.write(len(df))
|
141 |
# st.write(df.head())
|
142 |
with st.spinner('counting...'):
|
@@ -144,24 +185,30 @@ def get_area_data(var, filters, vals):
|
|
144 |
groups[var] = groups[var].apply(lambda x: id2name(var, x))
|
145 |
return groups
|
146 |
|
|
|
147 |
def line():
|
148 |
st.header('Line Graph')
|
149 |
|
|
|
150 |
def scatter():
|
151 |
st.header('Scatter Plot')
|
152 |
|
|
|
153 |
def bar():
|
154 |
st.header('Bar Chart')
|
155 |
|
|
|
156 |
def box():
|
157 |
st.header('Box Plot')
|
158 |
|
|
|
159 |
def heat():
|
160 |
st.header('Heatmap')
|
161 |
|
|
|
162 |
def hist():
|
163 |
st.header('Histogram')
|
164 |
|
|
|
165 |
if __name__ == '__main__':
|
166 |
main()
|
167 |
-
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
+
import math
|
3 |
import altair as alt
|
4 |
+
import pandas as pd
|
5 |
+
import streamlit as st
|
6 |
+
st.set_page_config(layout="wide")
|
7 |
+
|
8 |
from utils import get_variable_filter, is_numerical
|
9 |
|
10 |
+
|
11 |
def check_codes(var, data_meta):
|
12 |
return 'codes' in data_meta[var] and data_meta[var]['codes'] is not None and len(data_meta[var]['codes']) > 0
|
13 |
|
14 |
+
|
15 |
@st.cache(show_spinner=False)
|
16 |
def load_data():
|
17 |
with st.spinner('loading data...'):
|
|
|
25 |
name2id[k] = {vv: int(kk) for kk, vv in v['codes'].items()}
|
26 |
return data_main, data_meta, data_variable, data_filter, name2id
|
27 |
|
28 |
+
|
29 |
DATA_MAIN, DATA_META, DATA_VARIABLE, DATA_FILTER, NAME2ID = load_data()
|
30 |
|
31 |
+
|
32 |
def id2name(var, id_):
|
33 |
id_ = str(id_)
|
34 |
if var not in DATA_META or 'codes' not in DATA_META[var]:
|
35 |
return id_
|
36 |
return DATA_META[var]['codes'].get(id_, id_)
|
37 |
|
38 |
+
|
39 |
@st.cache
|
40 |
def name2id(var, name):
|
41 |
if var in NAME2ID:
|
42 |
return NAME2ID[var][name]
|
43 |
return name
|
44 |
|
45 |
+
|
46 |
def main():
|
47 |
# st.write(df.head())
|
48 |
load_data()
|
|
|
55 |
'Heatmap': heat,
|
56 |
'Histogram': hist,
|
57 |
}
|
58 |
+
# st.title('HRL Portal')
|
59 |
with st.sidebar:
|
60 |
+
st.subheader('Chart Type')
|
61 |
+
chart = st.selectbox('Select a chart type:', list(charts.keys()), label_visibility='collapsed')
|
62 |
# st.write(DATA_VARIABLE)
|
63 |
# st.write(DATA_FILTER)
|
64 |
charts[chart]()
|
65 |
|
66 |
+
|
67 |
@st.cache
|
68 |
def get_unique(var):
|
69 |
if check_codes(var, DATA_META):
|
70 |
return list(DATA_META[var]['codes'].values())
|
71 |
return DATA_MAIN[var].unique().tolist()
|
72 |
|
73 |
+
|
74 |
def get_var_name(var):
|
75 |
if var in DATA_META and 'name' in DATA_META[var]:
|
76 |
name = DATA_META[var]['name']
|
|
|
79 |
return f"{var} ({name})"
|
80 |
return var
|
81 |
|
82 |
+
|
83 |
def name2var(name):
|
84 |
return name.split()[0]
|
85 |
|
86 |
+
|
87 |
def meta():
|
88 |
st.header('Variables')
|
89 |
for k, v in DATA_META.items():
|
90 |
st.subheader(k)
|
91 |
st.write(v['description'])
|
92 |
|
93 |
+
|
94 |
def area():
|
95 |
with st.sidebar:
|
96 |
+
st.subheader('Variable')
|
97 |
+
var_name = st.selectbox('Select a variable:', [get_var_name(var) for var in DATA_VARIABLE['area']], label_visibility='collapsed')
|
98 |
var = name2var(var_name)
|
99 |
+
st.subheader('Filters')
|
100 |
+
# apply_fil = st.button('Apply Filters')
|
101 |
filters = {}
|
102 |
for fvar in DATA_FILTER[var]:
|
103 |
if fvar == 'YEAR':
|
104 |
continue
|
105 |
if is_numerical(fvar):
|
106 |
+
names = st.slider(fvar, min(get_unique(fvar)), max(get_unique(fvar)),
|
107 |
+
value=(min(get_unique(fvar)), max(get_unique(fvar))))
|
108 |
else:
|
109 |
names = st.multiselect(fvar, get_unique(fvar), default=get_unique(fvar))
|
110 |
filters[fvar] = set([name2id(fvar, n) for n in names])
|
111 |
+
st.header(f'Area Chart - {var_name}')
|
112 |
+
# with st.expander(f'{var}'):
|
113 |
+
# st.write(DATA_META.get(var, {}).get('description', ''))
|
114 |
+
|
115 |
+
# st.write(len([v for k, v in DATA_META[var]['codes'].items() if k != -1]))
|
116 |
+
# vals = set([name2id(var, n) for n in names])
|
117 |
+
|
118 |
+
container = st.container()
|
119 |
+
all = st.button("Select all")
|
120 |
+
clear = st.button("Clear all")
|
121 |
+
|
122 |
+
if all:
|
123 |
+
names = container.multiselect(f"Select one or more {var} values to display:",
|
124 |
+
get_unique(var), [v for k, v in DATA_META[var]['codes'].items() if k != -1])
|
125 |
+
elif clear:
|
126 |
+
names = container.multiselect(f"Select one or more {var} values to display:",
|
127 |
+
get_unique(var), [])
|
128 |
+
else:
|
129 |
+
names = container.multiselect(f"Select one or more {var} values to display:",
|
130 |
+
get_unique(var), default=['Authors', 'Musicians and music teachers', 'Telephone operators', 'Bus drivers', 'Cashiers'] if var == 'OCC1950' else ['Drugs and medicines','Fisheries','Glass and glass products',])
|
131 |
+
|
132 |
+
# if apply_fil:
|
133 |
+
# df = get_area_data(var, filters)
|
134 |
+
# else:
|
135 |
+
# df = get_area_data(var)
|
136 |
+
df = get_area_data(var, filters)
|
137 |
+
df = df[df[var].isin(names)]
|
138 |
# st.write(df)
|
139 |
+
|
140 |
+
if len(df) == 0:
|
141 |
+
st.warning('Select some data to display')
|
142 |
+
return
|
143 |
+
selection = alt.selection_multi(fields=[var], bind='legend')
|
144 |
+
plot = alt.Chart(df, title=f'Count of Different {var} Values').mark_area().encode(alt.X('YEAR'),
|
145 |
+
alt.Y('count', title='count',
|
146 |
+
stack='zero'),
|
147 |
+
alt.Color(var,
|
148 |
+
scale=alt.Scale(
|
149 |
+
scheme='category20'),
|
150 |
+
legend=alt.Legend(
|
151 |
+
# orient='bottom',
|
152 |
+
# columns=3
|
153 |
+
)),
|
154 |
+
opacity=alt.condition(selection,
|
155 |
+
alt.value(
|
156 |
+
1),
|
157 |
+
alt.value(
|
158 |
+
0.3)),
|
159 |
+
tooltip=['YEAR', var, 'count']
|
160 |
+
) \
|
161 |
+
.properties(height=400 + len(names)).add_selection(selection)
|
162 |
+
st.altair_chart(plot, use_container_width=True)
|
163 |
+
|
164 |
|
165 |
@st.cache(show_spinner=False)
|
166 |
+
def get_area_data(var, filters=None):
|
167 |
# for k, v in filters.items():
|
168 |
# st.write(k)
|
169 |
# st.write(v)
|
170 |
# st.write(DATA_MAIN[k].unique())
|
171 |
# st.write(len(DATA_MAIN))
|
172 |
+
if filters is not None:
|
173 |
+
df = DATA_MAIN[['YEAR', var, *filters.keys()]]
|
174 |
+
# df = df[df[var].isin(vals)]
|
175 |
+
with st.spinner('filtering...'):
|
176 |
+
for fvar, fvals in filters.items():
|
177 |
+
df = df[df[fvar].isin(fvals)]
|
178 |
+
df = df[['YEAR', var]]
|
179 |
+
else:
|
180 |
+
df = DATA_MAIN[['YEAR', var]]
|
181 |
# st.write(len(df))
|
182 |
# st.write(df.head())
|
183 |
with st.spinner('counting...'):
|
|
|
185 |
groups[var] = groups[var].apply(lambda x: id2name(var, x))
|
186 |
return groups
|
187 |
|
188 |
+
|
189 |
def line():
|
190 |
st.header('Line Graph')
|
191 |
|
192 |
+
|
193 |
def scatter():
|
194 |
st.header('Scatter Plot')
|
195 |
|
196 |
+
|
197 |
def bar():
|
198 |
st.header('Bar Chart')
|
199 |
|
200 |
+
|
201 |
def box():
|
202 |
st.header('Box Plot')
|
203 |
|
204 |
+
|
205 |
def heat():
|
206 |
st.header('Heatmap')
|
207 |
|
208 |
+
|
209 |
def hist():
|
210 |
st.header('Histogram')
|
211 |
|
212 |
+
|
213 |
if __name__ == '__main__':
|
214 |
main()
|
|
data/meta.json
CHANGED
@@ -647,7 +647,7 @@
|
|
647 |
},
|
648 |
"OCC1950": {
|
649 |
"name": "Occupation",
|
650 |
-
"description": "OCC1950 applies the 1950 Census Bureau occupational classification system to occupational data, to enhance comparability across years. For pre-1940 samples created at Minnesota, the alphabetic responses supplied by enumerators were directly coded into the 1950 classification. For other samples, the information in the variable OCC was recoded into the 1950 classification.
|
651 |
"codes": {
|
652 |
"0": "Accountants and auditors",
|
653 |
"1": "Actors and actresses",
|
|
|
647 |
},
|
648 |
"OCC1950": {
|
649 |
"name": "Occupation",
|
650 |
+
"description": "OCC1950 applies the 1950 Census Bureau occupational classification system to occupational data, to enhance comparability across years. For pre-1940 samples created at Minnesota, the alphabetic responses supplied by enumerators were directly coded into the 1950 classification. For other samples, the information in the variable OCC was recoded into the 1950 classification.",
|
651 |
"codes": {
|
652 |
"0": "Accountants and auditors",
|
653 |
"1": "Actors and actresses",
|