cboettig commited on
Commit
2a55e4a
·
1 Parent(s): a329c1d
Files changed (2) hide show
  1. app.py +48 -78
  2. requirements.txt +1 -0
app.py CHANGED
@@ -17,42 +17,38 @@
17
 
18
  import os
19
 
 
20
  import altair as alt
21
  import numpy as np
22
  import pandas as pd
23
  import pydeck as pdk
24
  import streamlit as st
25
 
 
 
 
 
26
  # SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
27
- st.set_page_config(layout="wide", page_title="NYC Ridesharing Demo", page_icon=":taxi:")
 
28
 
29
 
30
  # LOAD DATA ONCE
31
  @st.cache_resource
32
  def load_data():
33
- path = "uber-raw-data-sep14.csv.gz"
34
- if not os.path.isfile(path):
35
- path = f"https://github.com/streamlit/demo-uber-nyc-pickups/raw/main/{path}"
36
-
37
- data = pd.read_csv(
38
- path,
39
- nrows=100000, # approx. 10% of data
40
- names=[
41
- "date/time",
42
- "lat",
43
- "lon",
44
- ], # specify names directly since they don't change
45
- skiprows=1, # don't read header since names specified directly
46
- usecols=[0, 1, 2], # doesn't load last column, constant value "B02512"
47
- parse_dates=[
48
- "date/time"
49
- ], # set as datetime instead of converting after the fact
50
- )
51
-
52
  return data
53
 
54
 
55
- # FUNCTION FOR AIRPORT MAPS
56
  def map(data, lat, lon, zoom):
57
  st.write(
58
  pdk.Deck(
@@ -67,10 +63,9 @@ def map(data, lat, lon, zoom):
67
  pdk.Layer(
68
  "HexagonLayer",
69
  data=data,
70
- get_position=["lon", "lat"],
71
- radius=100,
72
- elevation_scale=4,
73
- elevation_range=[0, 1000],
74
  pickable=True,
75
  extruded=True,
76
  ),
@@ -79,10 +74,10 @@ def map(data, lat, lon, zoom):
79
  )
80
 
81
 
82
- # FILTER DATA FOR A SPECIFIC HOUR, CACHE
83
  @st.cache_data
84
- def filterdata(df, hour_selected):
85
- return df[df["date/time"].dt.hour == hour_selected]
86
 
87
 
88
  # CALCULATE MIDPOINT FOR GIVEN SET OF DATA
@@ -91,17 +86,6 @@ def mpoint(lat, lon):
91
  return (np.average(lat), np.average(lon))
92
 
93
 
94
- # FILTER DATA BY HOUR
95
- @st.cache_data
96
- def histdata(df, hr):
97
- filtered = data[
98
- (df["date/time"].dt.hour >= hr) & (df["date/time"].dt.hour < (hr + 1))
99
- ]
100
-
101
- hist = np.histogram(filtered["date/time"].dt.minute, bins=60, range=(0, 60))[0]
102
-
103
- return pd.DataFrame({"minute": range(60), "pickups": hist})
104
-
105
 
106
  # STREAMLIT APP LAYOUT
107
  data = load_data()
@@ -114,8 +98,8 @@ row1_1, row1_2 = st.columns((2, 3))
114
  # E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
115
  if not st.session_state.get("url_synced", False):
116
  try:
117
- pickup_hour = int(st.query_params["pickup_hour"][0])
118
- st.session_state["pickup_hour"] = pickup_hour
119
  st.session_state["url_synced"] = True
120
  except KeyError:
121
  pass
@@ -123,14 +107,14 @@ if not st.session_state.get("url_synced", False):
123
 
124
  # IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
125
  def update_query_params():
126
- hour_selected = st.session_state["pickup_hour"]
127
- st.query_params["pickup_hour"]=hour_selected
128
 
129
 
130
  with row1_1:
131
  st.title("NYC Uber Ridesharing Data")
132
  hour_selected = st.slider(
133
- "Select hour of pickup", 0, 23, key="pickup_hour", on_change=update_query_params
134
  )
135
 
136
 
@@ -138,57 +122,43 @@ with row1_2:
138
  st.write(
139
  """
140
  ##
141
- Examining how Uber pickups vary over time in New York City's and at its major regional airports.
142
- By sliding the slider on the left you can view different slices of time and explore different transportation trends.
143
  """
144
  )
145
 
146
  # LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
147
  row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
148
 
 
149
  # SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
150
- la_guardia = [40.7900, -73.8700]
151
- jfk = [40.6650, -73.7821]
152
- newark = [40.7090, -74.1805]
153
- zoom_level = 12
154
- midpoint = mpoint(data["lat"], data["lon"])
155
 
156
  with row2_1:
157
  st.write(
158
- f"""**All New York City from {hour_selected}:00 and {(hour_selected + 1) % 24}:00**"""
159
  )
160
- map(filterdata(data, hour_selected), midpoint[0], midpoint[1], 11)
161
 
162
  with row2_2:
163
- st.write("**La Guardia Airport**")
164
- map(filterdata(data, hour_selected), la_guardia[0], la_guardia[1], zoom_level)
165
 
166
  with row2_3:
167
- st.write("**JFK Airport**")
168
- map(filterdata(data, hour_selected), jfk[0], jfk[1], zoom_level)
169
 
170
  with row2_4:
171
- st.write("**Newark Airport**")
172
- map(filterdata(data, hour_selected), newark[0], newark[1], zoom_level)
 
 
 
 
 
173
 
174
- # CALCULATING DATA FOR THE HISTOGRAM
175
- chart_data = histdata(data, hour_selected)
176
 
177
- # LAYING OUT THE HISTOGRAM SECTION
178
- st.write(
179
- f"""**Breakdown of rides per minute between {hour_selected}:00 and {(hour_selected + 1) % 24}:00**"""
180
- )
181
 
182
- st.altair_chart(
183
- alt.Chart(chart_data)
184
- .mark_area(
185
- interpolate="step-after",
186
- )
187
- .encode(
188
- x=alt.X("minute:Q", scale=alt.Scale(nice=False)),
189
- y=alt.Y("pickups:Q"),
190
- tooltip=["minute", "pickups"],
191
- )
192
- .configure_mark(opacity=0.2, color="red"),
193
- use_container_width=True,
194
- )
 
17
 
18
  import os
19
 
20
+ # +
21
  import altair as alt
22
  import numpy as np
23
  import pandas as pd
24
  import pydeck as pdk
25
  import streamlit as st
26
 
27
+ import ibis
28
+ from ibis import _
29
+ # -
30
+
31
  # SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
32
+ st.set_page_config(layout="wide", page_title="GBIF Biodiversity Demo", page_icon=":butterfly:")
33
+
34
 
35
 
36
  # LOAD DATA ONCE
37
  @st.cache_resource
38
  def load_data():
39
+ con = ibis.duckdb.connect()
40
+ path = "butterflies_gb.parquet"
41
+ if os.path.isfile(path):
42
+ return con.read_parquet(path)
43
+ df = con.read_parquet("s3://gbif-open-data-us-east-1/occurrence/2023-06-01/occurrence.parquet")
44
+ data = (df
45
+ .dropna(["decimallongitude", "decimallatitude", "year"], how="any")
46
+ .filter([_.order == "Lepidoptera", _.countrycode=="GB"])
47
+ .select(['year', 'decimallongitude', 'decimallatitude'])
48
+ )
 
 
 
 
 
 
 
 
 
49
  return data
50
 
51
 
 
52
  def map(data, lat, lon, zoom):
53
  st.write(
54
  pdk.Deck(
 
63
  pdk.Layer(
64
  "HexagonLayer",
65
  data=data,
66
+ get_position=["decimallongitude", "decimallatitude"],
67
+ radius=1000,
68
+ elevation_scale=100,
 
69
  pickable=True,
70
  extruded=True,
71
  ),
 
74
  )
75
 
76
 
77
+ # FILTER DATA FOR A SPECIFIC YEAR, CACHE
78
  @st.cache_data
79
+ def filterdata(df, year):
80
+ return df.filter(_.year == year).to_pandas()
81
 
82
 
83
  # CALCULATE MIDPOINT FOR GIVEN SET OF DATA
 
86
  return (np.average(lat), np.average(lon))
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  # STREAMLIT APP LAYOUT
91
  data = load_data()
 
98
  # E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
99
  if not st.session_state.get("url_synced", False):
100
  try:
101
+ pickup_hour = int(st.query_params["year"][0])
102
+ st.session_state["year"] = year
103
  st.session_state["url_synced"] = True
104
  except KeyError:
105
  pass
 
107
 
108
  # IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
109
  def update_query_params():
110
+ year_selected = st.session_state["year"]
111
+ st.query_params["year"]=year_selected
112
 
113
 
114
  with row1_1:
115
  st.title("NYC Uber Ridesharing Data")
116
  hour_selected = st.slider(
117
+ "Select hour of pickup", 2010, 2023, key="year", on_change=update_query_params
118
  )
119
 
120
 
 
122
  st.write(
123
  """
124
  ##
125
+ By sliding the slider on the left you can view different slices of time and explore different trends.
 
126
  """
127
  )
128
 
129
  # LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
130
  row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
131
 
132
+ # +
133
  # SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
134
+
135
+ midpoint = (52.0, -1.0) #mpoint(data["lat"], data["lon"])
136
+ # -
137
+
138
+
139
 
140
  with row2_1:
141
  st.write(
142
+ f"""**All UK {year}**"""
143
  )
144
+ map(filterdata(data, year), midpoint[0], midpoint[1], 4)
145
 
146
  with row2_2:
147
+ st.write("**Panel 1**")
148
+ map(filterdata(data, year), midpoint[0], midpoint[1], 3)
149
 
150
  with row2_3:
151
+ st.write("**Panel 2**")
152
+ map(filterdata(data, year), midpoint[0], midpoint[1], 2)
153
 
154
  with row2_4:
155
+ st.write("**Panel 3**")
156
+ map(filterdata(data, year), midpoint[0], midpoint[1], 1)
157
+
158
+
159
+
160
+
161
+
162
 
 
 
163
 
 
 
 
 
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -3,3 +3,4 @@ numpy
3
  pandas
4
  pydeck
5
  streamlit
 
 
3
  pandas
4
  pydeck
5
  streamlit
6
+ ibis-framework[duckdb]