Spaces:
Sleeping
Sleeping
import ibis | |
from ibis import _ | |
import pydeck | |
# + | |
def connect_data(): | |
con = ibis.duckdb.connect() | |
con.raw_sql(''' | |
INSTALL httpfs; | |
LOAD httpfs; | |
LOAD 'build/release/extension/h3ext/h3ext.duckdb_extension'; | |
SET s3_url_style='path'; | |
SET s3_endpoint='minio.carlboettiger.info'; | |
CREATE VIEW gb AS SELECT * FROM read_parquet('s3://shared-data/gbif_gb/**'); | |
''') | |
return con | |
''' | |
CREATE VIEW gbif AS SELECT * FROM read_parquet('s3://gbif/*'); | |
INSTALL httpfs; | |
LOAD httpfs; | |
SET s3_url_style='path'; | |
SET s3_endpoint='minio.carlboettiger.info'; | |
SET temp_directory='/tmp/duckdb'; | |
SET memory_limit = '150GB'; | |
SET max_memory = '150GB'; | |
COPY | |
( | |
SELECT *, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 1)) as h3z1, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 2)) as h3z2, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 3)) as h3z3, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 4)) as h3z4, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 5)) as h3z5, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 6)) as h3z6, | |
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 7)) as h3z7 | |
FROM gbif | |
WHERE (NOT((decimallatitude IS NULL))) AND (NOT((decimallongitude IS NULL))) AND (countrycode = 'US') | |
) TO 's3://shared-data/gbif/US' (FORMAT 'parquet', PARTITION_BY h3z1); | |
''' | |
# distinct species observations at h7 resolution | |
def richness_data(con): | |
data = ( | |
con.table("gb"). | |
filter(_.phylum == "Chordata"). | |
select(_.genus, _.species, _["class"], _.h3z2, _.h3z3, _.h3z4, _.h3z5, _.h3z6, _.h3z7). | |
distinct(). | |
to_parquet("gb-cache.parquet") | |
) | |
return data | |
con = connect_data() | |
richness_data(con) | |
# - | |
# + | |
def zoom_data(zoom=6): | |
hzoom = "h3z" + str(zoom) | |
data = ( | |
con.read_parquet("gb-cache.parquet"). | |
rename(h3 = hzoom). | |
group_by([_.h3, _["class"]]). | |
aggregate(n = _.count()). | |
to_csv("gbif-vert-gb-" + hzoom + ".csv") | |
) | |
return data | |
def filterdata(df, year): | |
return df[df.year == year] | |
zoom_data(4) | |
zoom_data(5) | |
zoom_data(6) | |
zoom_data(7) | |
# + | |
def load_data(zoom=7): | |
con = ibis.duckdb.connect() | |
path = "gbif-vert-gb-h3z" + str(zoom) + ".csv" | |
df_all = ( | |
con. | |
read_csv(path). | |
group_by(_.h3). | |
aggregate(n = _.n.sum()). | |
mutate(color = 255 * _.n / _.n.max()). | |
to_pandas() | |
) | |
return df_all | |
def load_class(taxa="Amphibia", zoom=7): | |
con = ibis.duckdb.connect() | |
path = "gbif-vert-gb-h3z" + str(zoom) + ".csv" | |
df = (con. | |
read_csv(path). | |
filter(_['class']==taxa). | |
mutate(color = 255 * _.n / _.n.max()). | |
to_pandas() | |
) | |
return df | |
df = load_data() | |
df | |
# + | |
# Define a layer to display on a map | |
import pydeck as pdk | |
# Set the viewport location | |
view_state = pdk.ViewState( | |
longitude=-1.415, | |
latitude=52.2323, | |
zoom=4, | |
min_zoom=1, | |
max_zoom=12, | |
pitch=40.5, | |
bearing=-27.36) | |
def map(data): | |
layer = pdk.Layer( | |
"H3HexagonLayer", | |
data, | |
pickable=True, | |
stroked=True, | |
filled=True, | |
extruded=True, | |
elevation_scale=100, | |
get_elevation='color', | |
get_hexagon="h3", | |
get_fill_color="[color, 30, 255 - color, 160]", | |
get_line_color=[255, 255, 255], | |
line_width_min_pixels=2, | |
) | |
# Render | |
r = pdk.Deck(layers=[layer], initial_view_state=view_state) | |
return r.to_html("hex_layer.html") | |
map(df) | |