cboettig's picture
species richness
1fe16a2
raw
history blame
3.99 kB
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example of showing species richness from GBIF data."""
import os
# +
import altair as alt
import numpy as np
import pandas as pd
import pydeck as pdk
import streamlit as st
import ibis
from ibis import _
# -
# SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
st.set_page_config(layout="wide", page_title="GBIF Biodiversity Demo", page_icon=":butterfly:")
# +
# LOAD DATA ONCE
@st.cache_resource
def load_data():
con = ibis.duckdb.connect()
con.raw_sql('''
INSTALL httpfs;
LOAD httpfs;
SET s3_url_style='path';
SET s3_endpoint='minio.carlboettiger.info';
CREATE VIEW gb AS SELECT * FROM read_parquet('s3://shared-data/gbif_gb/**');
''')
hzoom = "h3z" + str(6)
data = (
con.table("gb").
select(_[hzoom], _.species, _.year).
distinct().
group_by([_[hzoom], _.year]).
aggregate(n = _.count()).
mutate(color = 255 * _.n / _.n.max()).
to_pandas()
)
return data
@st.cache_data
def filterdata(df, year):
return df[df.year == year]
# -
def map(data, lat, lon, zoom):
st.write(
pdk.Deck(
map_style="mapbox://styles/mapbox/light-v9",
initial_view_state={
"latitude": lat,
"longitude": lon,
"zoom": zoom,
"pitch": 50,
},
layers=[
pdk.Layer(
"H3HexagonLayer",
data,
pickable=True,
stroked=True,
filled=True,
extruded=True,
elevation_scale=200,
get_elevation='color',
get_hexagon="h3z6", # set by zoom
get_fill_color="[color, 30, color]",
get_line_color=[255, 255, 255],
line_width_min_pixels=2,
),
],
)
)
# CALCULATE MIDPOINT FOR GIVEN SET OF DATA
@st.cache_data
def mpoint(lat, lon):
return (np.average(lat), np.average(lon))
# LAYING OUT THE TOP SECTION OF THE APP
row1_1, row1_2 = st.columns((2, 3))
# SEE IF THERE'S A QUERY PARAM IN THE URL (e.g. ?pickup_hour=2)
# THIS ALLOWS YOU TO PASS A STATEFUL URL TO SOMEONE WITH A SPECIFIC HOUR SELECTED,
# E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
if not st.session_state.get("url_synced", False):
try:
year = int(st.query_params["year"][0])
st.session_state["year"] = year
st.session_state["url_synced"] = True
except KeyError:
pass
# IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
def update_query_params():
year_selected = st.session_state["year"]
st.query_params["year"]=year_selected
with row1_1:
st.title("GBIF Species Richness")
year_selected = st.slider(
"Select year", 2015, 2023, key="year", on_change=update_query_params
)
with row1_2:
st.write(
"""
##
By sliding the slider on the left you can view different slices of time and explore different trends.
"""
)
# +
# LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
# -
# SETTING THE ZOOM LOCATIONS
midpoint = (52.0, -1.0) #mpoint(data["lat"], data["lon"])
# STREAMLIT APP LAYOUT
data = load_data()
st.write(
f"""**Large Map**"""
)
map(filterdata(data, year_selected), midpoint[0], midpoint[1], 4)