cboettig's picture
drafts
4f08a04
import ibis
from ibis import _
import pydeck
# +
def connect_data():
con = ibis.duckdb.connect()
con.raw_sql('''
INSTALL httpfs;
LOAD httpfs;
LOAD 'build/release/extension/h3ext/h3ext.duckdb_extension';
SET s3_url_style='path';
SET s3_endpoint='minio.carlboettiger.info';
CREATE VIEW gb AS SELECT * FROM read_parquet('s3://shared-data/gbif_gb/**');
''')
return con
'''
CREATE VIEW gbif AS SELECT * FROM read_parquet('s3://gbif/*');
INSTALL httpfs;
LOAD httpfs;
SET s3_url_style='path';
SET s3_endpoint='minio.carlboettiger.info';
SET temp_directory='/tmp/duckdb';
SET memory_limit = '150GB';
SET max_memory = '150GB';
COPY
(
SELECT *,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 1)) as h3z1,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 2)) as h3z2,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 3)) as h3z3,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 4)) as h3z4,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 5)) as h3z5,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 6)) as h3z6,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 7)) as h3z7
FROM gbif
WHERE (NOT((decimallatitude IS NULL))) AND (NOT((decimallongitude IS NULL))) AND (countrycode = 'US')
) TO 's3://shared-data/gbif/US' (FORMAT 'parquet', PARTITION_BY h3z1);
'''
# distinct species observations at h7 resolution
def richness_data(con):
data = (
con.table("gb").
filter(_.phylum == "Chordata").
select(_.genus, _.species, _["class"], _.h3z2, _.h3z3, _.h3z4, _.h3z5, _.h3z6, _.h3z7).
distinct().
to_parquet("gb-cache.parquet")
)
return data
con = connect_data()
richness_data(con)
# -
# +
def zoom_data(zoom=6):
hzoom = "h3z" + str(zoom)
data = (
con.read_parquet("gb-cache.parquet").
rename(h3 = hzoom).
group_by([_.h3, _["class"]]).
aggregate(n = _.count()).
to_csv("gbif-vert-gb-" + hzoom + ".csv")
)
return data
def filterdata(df, year):
return df[df.year == year]
zoom_data(4)
zoom_data(5)
zoom_data(6)
zoom_data(7)
# +
def load_data(zoom=7):
con = ibis.duckdb.connect()
path = "gbif-vert-gb-h3z" + str(zoom) + ".csv"
df_all = (
con.
read_csv(path).
group_by(_.h3).
aggregate(n = _.n.sum()).
mutate(color = 255 * _.n / _.n.max()).
to_pandas()
)
return df_all
def load_class(taxa="Amphibia", zoom=7):
con = ibis.duckdb.connect()
path = "gbif-vert-gb-h3z" + str(zoom) + ".csv"
df = (con.
read_csv(path).
filter(_['class']==taxa).
mutate(color = 255 * _.n / _.n.max()).
to_pandas()
)
return df
df = load_data()
df
# +
# Define a layer to display on a map
import pydeck as pdk
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=4,
min_zoom=1,
max_zoom=12,
pitch=40.5,
bearing=-27.36)
def map(data):
layer = pdk.Layer(
"H3HexagonLayer",
data,
pickable=True,
stroked=True,
filled=True,
extruded=True,
elevation_scale=100,
get_elevation='color',
get_hexagon="h3",
get_fill_color="[color, 30, 255 - color, 160]",
get_line_color=[255, 255, 255],
line_width_min_pixels=2,
)
# Render
r = pdk.Deck(layers=[layer], initial_view_state=view_state)
return r.to_html("hex_layer.html")
map(df)