Spaces:
Sleeping
Sleeping
test
Browse files- app.py +48 -78
- requirements.txt +1 -0
app.py
CHANGED
@@ -17,42 +17,38 @@
|
|
17 |
|
18 |
import os
|
19 |
|
|
|
20 |
import altair as alt
|
21 |
import numpy as np
|
22 |
import pandas as pd
|
23 |
import pydeck as pdk
|
24 |
import streamlit as st
|
25 |
|
|
|
|
|
|
|
|
|
26 |
# SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
|
27 |
-
st.set_page_config(layout="wide", page_title="
|
|
|
28 |
|
29 |
|
30 |
# LOAD DATA ONCE
|
31 |
@st.cache_resource
|
32 |
def load_data():
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
"lon",
|
44 |
-
], # specify names directly since they don't change
|
45 |
-
skiprows=1, # don't read header since names specified directly
|
46 |
-
usecols=[0, 1, 2], # doesn't load last column, constant value "B02512"
|
47 |
-
parse_dates=[
|
48 |
-
"date/time"
|
49 |
-
], # set as datetime instead of converting after the fact
|
50 |
-
)
|
51 |
-
|
52 |
return data
|
53 |
|
54 |
|
55 |
-
# FUNCTION FOR AIRPORT MAPS
|
56 |
def map(data, lat, lon, zoom):
|
57 |
st.write(
|
58 |
pdk.Deck(
|
@@ -67,10 +63,9 @@ def map(data, lat, lon, zoom):
|
|
67 |
pdk.Layer(
|
68 |
"HexagonLayer",
|
69 |
data=data,
|
70 |
-
get_position=["
|
71 |
-
radius=
|
72 |
-
elevation_scale=
|
73 |
-
elevation_range=[0, 1000],
|
74 |
pickable=True,
|
75 |
extruded=True,
|
76 |
),
|
@@ -79,10 +74,10 @@ def map(data, lat, lon, zoom):
|
|
79 |
)
|
80 |
|
81 |
|
82 |
-
# FILTER DATA FOR A SPECIFIC
|
83 |
@st.cache_data
|
84 |
-
def filterdata(df,
|
85 |
-
return df
|
86 |
|
87 |
|
88 |
# CALCULATE MIDPOINT FOR GIVEN SET OF DATA
|
@@ -91,17 +86,6 @@ def mpoint(lat, lon):
|
|
91 |
return (np.average(lat), np.average(lon))
|
92 |
|
93 |
|
94 |
-
# FILTER DATA BY HOUR
|
95 |
-
@st.cache_data
|
96 |
-
def histdata(df, hr):
|
97 |
-
filtered = data[
|
98 |
-
(df["date/time"].dt.hour >= hr) & (df["date/time"].dt.hour < (hr + 1))
|
99 |
-
]
|
100 |
-
|
101 |
-
hist = np.histogram(filtered["date/time"].dt.minute, bins=60, range=(0, 60))[0]
|
102 |
-
|
103 |
-
return pd.DataFrame({"minute": range(60), "pickups": hist})
|
104 |
-
|
105 |
|
106 |
# STREAMLIT APP LAYOUT
|
107 |
data = load_data()
|
@@ -114,8 +98,8 @@ row1_1, row1_2 = st.columns((2, 3))
|
|
114 |
# E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
|
115 |
if not st.session_state.get("url_synced", False):
|
116 |
try:
|
117 |
-
pickup_hour = int(st.query_params["
|
118 |
-
st.session_state["
|
119 |
st.session_state["url_synced"] = True
|
120 |
except KeyError:
|
121 |
pass
|
@@ -123,14 +107,14 @@ if not st.session_state.get("url_synced", False):
|
|
123 |
|
124 |
# IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
|
125 |
def update_query_params():
|
126 |
-
|
127 |
-
st.query_params["
|
128 |
|
129 |
|
130 |
with row1_1:
|
131 |
st.title("NYC Uber Ridesharing Data")
|
132 |
hour_selected = st.slider(
|
133 |
-
"Select hour of pickup",
|
134 |
)
|
135 |
|
136 |
|
@@ -138,57 +122,43 @@ with row1_2:
|
|
138 |
st.write(
|
139 |
"""
|
140 |
##
|
141 |
-
|
142 |
-
By sliding the slider on the left you can view different slices of time and explore different transportation trends.
|
143 |
"""
|
144 |
)
|
145 |
|
146 |
# LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
|
147 |
row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
|
148 |
|
|
|
149 |
# SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
|
156 |
with row2_1:
|
157 |
st.write(
|
158 |
-
f"""**All
|
159 |
)
|
160 |
-
map(filterdata(data,
|
161 |
|
162 |
with row2_2:
|
163 |
-
st.write("**
|
164 |
-
map(filterdata(data,
|
165 |
|
166 |
with row2_3:
|
167 |
-
st.write("**
|
168 |
-
map(filterdata(data,
|
169 |
|
170 |
with row2_4:
|
171 |
-
st.write("**
|
172 |
-
map(filterdata(data,
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
-
# CALCULATING DATA FOR THE HISTOGRAM
|
175 |
-
chart_data = histdata(data, hour_selected)
|
176 |
|
177 |
-
# LAYING OUT THE HISTOGRAM SECTION
|
178 |
-
st.write(
|
179 |
-
f"""**Breakdown of rides per minute between {hour_selected}:00 and {(hour_selected + 1) % 24}:00**"""
|
180 |
-
)
|
181 |
|
182 |
-
st.altair_chart(
|
183 |
-
alt.Chart(chart_data)
|
184 |
-
.mark_area(
|
185 |
-
interpolate="step-after",
|
186 |
-
)
|
187 |
-
.encode(
|
188 |
-
x=alt.X("minute:Q", scale=alt.Scale(nice=False)),
|
189 |
-
y=alt.Y("pickups:Q"),
|
190 |
-
tooltip=["minute", "pickups"],
|
191 |
-
)
|
192 |
-
.configure_mark(opacity=0.2, color="red"),
|
193 |
-
use_container_width=True,
|
194 |
-
)
|
|
|
17 |
|
18 |
import os
|
19 |
|
20 |
+
# +
|
21 |
import altair as alt
|
22 |
import numpy as np
|
23 |
import pandas as pd
|
24 |
import pydeck as pdk
|
25 |
import streamlit as st
|
26 |
|
27 |
+
import ibis
|
28 |
+
from ibis import _
|
29 |
+
# -
|
30 |
+
|
31 |
# SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
|
32 |
+
st.set_page_config(layout="wide", page_title="GBIF Biodiversity Demo", page_icon=":butterfly:")
|
33 |
+
|
34 |
|
35 |
|
36 |
# LOAD DATA ONCE
|
37 |
@st.cache_resource
|
38 |
def load_data():
|
39 |
+
con = ibis.duckdb.connect()
|
40 |
+
path = "butterflies_gb.parquet"
|
41 |
+
if os.path.isfile(path):
|
42 |
+
return con.read_parquet(path)
|
43 |
+
df = con.read_parquet("s3://gbif-open-data-us-east-1/occurrence/2023-06-01/occurrence.parquet")
|
44 |
+
data = (df
|
45 |
+
.dropna(["decimallongitude", "decimallatitude", "year"], how="any")
|
46 |
+
.filter([_.order == "Lepidoptera", _.countrycode=="GB"])
|
47 |
+
.select(['year', 'decimallongitude', 'decimallatitude'])
|
48 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
return data
|
50 |
|
51 |
|
|
|
52 |
def map(data, lat, lon, zoom):
|
53 |
st.write(
|
54 |
pdk.Deck(
|
|
|
63 |
pdk.Layer(
|
64 |
"HexagonLayer",
|
65 |
data=data,
|
66 |
+
get_position=["decimallongitude", "decimallatitude"],
|
67 |
+
radius=1000,
|
68 |
+
elevation_scale=100,
|
|
|
69 |
pickable=True,
|
70 |
extruded=True,
|
71 |
),
|
|
|
74 |
)
|
75 |
|
76 |
|
77 |
+
# FILTER DATA FOR A SPECIFIC YEAR, CACHE
|
78 |
@st.cache_data
|
79 |
+
def filterdata(df, year):
|
80 |
+
return df.filter(_.year == year).to_pandas()
|
81 |
|
82 |
|
83 |
# CALCULATE MIDPOINT FOR GIVEN SET OF DATA
|
|
|
86 |
return (np.average(lat), np.average(lon))
|
87 |
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
# STREAMLIT APP LAYOUT
|
91 |
data = load_data()
|
|
|
98 |
# E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
|
99 |
if not st.session_state.get("url_synced", False):
|
100 |
try:
|
101 |
+
pickup_hour = int(st.query_params["year"][0])
|
102 |
+
st.session_state["year"] = year
|
103 |
st.session_state["url_synced"] = True
|
104 |
except KeyError:
|
105 |
pass
|
|
|
107 |
|
108 |
# IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
|
109 |
def update_query_params():
|
110 |
+
year_selected = st.session_state["year"]
|
111 |
+
st.query_params["year"]=year_selected
|
112 |
|
113 |
|
114 |
with row1_1:
|
115 |
st.title("NYC Uber Ridesharing Data")
|
116 |
hour_selected = st.slider(
|
117 |
+
"Select hour of pickup", 2010, 2023, key="year", on_change=update_query_params
|
118 |
)
|
119 |
|
120 |
|
|
|
122 |
st.write(
|
123 |
"""
|
124 |
##
|
125 |
+
By sliding the slider on the left you can view different slices of time and explore different trends.
|
|
|
126 |
"""
|
127 |
)
|
128 |
|
129 |
# LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
|
130 |
row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
|
131 |
|
132 |
+
# +
|
133 |
# SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
|
134 |
+
|
135 |
+
midpoint = (52.0, -1.0) #mpoint(data["lat"], data["lon"])
|
136 |
+
# -
|
137 |
+
|
138 |
+
|
139 |
|
140 |
with row2_1:
|
141 |
st.write(
|
142 |
+
f"""**All UK {year}**"""
|
143 |
)
|
144 |
+
map(filterdata(data, year), midpoint[0], midpoint[1], 4)
|
145 |
|
146 |
with row2_2:
|
147 |
+
st.write("**Panel 1**")
|
148 |
+
map(filterdata(data, year), midpoint[0], midpoint[1], 3)
|
149 |
|
150 |
with row2_3:
|
151 |
+
st.write("**Panel 2**")
|
152 |
+
map(filterdata(data, year), midpoint[0], midpoint[1], 2)
|
153 |
|
154 |
with row2_4:
|
155 |
+
st.write("**Panel 3**")
|
156 |
+
map(filterdata(data, year), midpoint[0], midpoint[1], 1)
|
157 |
+
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
+
|
162 |
|
|
|
|
|
163 |
|
|
|
|
|
|
|
|
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ numpy
|
|
3 |
pandas
|
4 |
pydeck
|
5 |
streamlit
|
|
|
|
3 |
pandas
|
4 |
pydeck
|
5 |
streamlit
|
6 |
+
ibis-framework[duckdb]
|