Spaces:
Sleeping
Sleeping
import streamlit as st | |
import leafmap.maplibregl as leafmap | |
import pandas as pd | |
import numpy as np | |
from matplotlib import cm | |
import ibis | |
from ibis import _ | |
from huggingface_hub import HfApi, login | |
from langchain_openai import ChatOpenAI | |
from langchain_community.utilities import SQLDatabase | |
from langchain.chains import create_sql_query_chain | |
# + | |
## Benchmark possible access locations, but local blows everything else away | |
# h3_parquet = "https://huggingface.co/datasets/boettiger-lab/gbif/resolve/main/gbif_ca.geoparquet" | |
h3_parquet = "US_NE.parquet" | |
# with more local storage space, full US works fine. | |
#h3_parquet = "/home/rstudio/source.coop/cboettig/gbif/gbif_us_h3.parquet" | |
con = ibis.duckdb.connect(extensions=["spatial", "httpfs"]) | |
gbif_h3 = con.read_parquet(h3_parquet, "gbif_h3") | |
# - | |
st.set_page_config(page_title="GBIF Observations Explorer", layout="wide") | |
st.header("GBIF Observations Explorer", divider="rainbow") | |
## We have actually pre-calculated most of these so we don't need them here. | |
def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int: | |
... | |
def hex(array) -> str: | |
... | |
def h3_cell_to_boundary_wkt (array) -> str: | |
... | |
## some versions need this manual install of h3 duckdb extension | |
# con.raw_sql(''' | |
# INSTALL h3 FROM community; | |
# LOAD h3; | |
# ''') | |
# - | |
def filter_gbif(_df, species="Canis lupus", bbox = [-130., 30., -90., 60.]): | |
return (_df | |
.filter(_.decimallongitude >= bbox[0], | |
_.decimallongitude < bbox[2], | |
_.decimallatitude >= bbox[1], | |
_.decimallatitude < bbox[3], | |
_.species == species | |
) | |
) | |
def get_h3point_df(_df, resolution: float) -> pd.DataFrame: | |
column = "h" + str(resolution) | |
df = (_df | |
.rename(hex = column) | |
.group_by(_.hex) | |
.agg(n = _.count()) | |
# .mutate(wkt = h3_cell_to_boundary_wkt(_.hex)) | |
.mutate(v = _.n.log()) | |
.mutate(normalized_values = _.v / _.v.max()) | |
.to_pandas() | |
) | |
rgb = cm.viridis(df.normalized_values) | |
rgb_array = np.round( rgb * 255 ).astype(int).clip(0,255).tolist() | |
df['rgb'] = rgb_array | |
#df['viridis_hex'] = colors.to_hex(rgb) # not robust? | |
df['viridis_hex'] = [f"#{int(c[0] * 255):02x}{int(c[1] * 255):02x}{int(c[2] * 255):02x}" for c in rgb] | |
return df | |
# + | |
login(st.secrets["HF_TOKEN"]) | |
def host_df(df, filename = "live.json", repo_id="boettiger-lab/gbif"): | |
df.to_json(".static/"+filename, orient='records', indent=2) | |
api = HfApi() | |
info = api.upload_file( | |
path_or_fileobj=".static/"+filename, | |
path_in_repo="live/" + filename, | |
repo_id=repo_id, | |
repo_type="dataset", | |
) | |
# to avoid cache, use unique commit url | |
commit_hash = info.oid | |
return f"https://huggingface.co/datasets/{repo_id}/resolve/{commit_hash}/live/{filename}" | |
def hex_layer(m, df: pd.DataFrame, v_scale = 1): | |
url = host_df(df) | |
deck_grid_layer = { | |
"@@type": "H3HexagonLayer", | |
"id": "my-layer", | |
"data": url, | |
"getHexagon": "@@=hex", | |
"getFillColor": "@@=rgb", | |
"getElevation": "@@=normalized_values", | |
"elevationScale": 5000 * 10 ** v_scale, | |
"elevationRange": [0,1], | |
} | |
return m.add_deck_layers([deck_grid_layer], "occurrences") | |
# + | |
# #%%time | |
def local_test(): | |
bbox = [-120, 37, -118, 39] | |
df = filter_gbif(gbif_h3, species = "Canis lupus", bbox = bbox) | |
df = get_h3point_df(df, 6) | |
m = leafmap.Map(style="openstreetmap", center=(-121.4, 37.74), zoom=7,) | |
hex_layer(m, df) | |
return m | |
#local_test() | |
# + | |
import os | |
import streamlit as st | |
os.environ["MAPTILER_KEY"] = st.secrets["MAPTILER_KEY"] | |
import leafmap.maplibregl as leafmap | |
m = leafmap.Map(style="positron", center=(-121.4, 37.50), zoom=7,) | |
#m | |
# + | |
# Set up Langchain SQL access | |
db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True) | |
db.run(f"create or replace view gbif_h3 as select * from read_parquet('{h3_parquet}');") | |
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) | |
chain = create_sql_query_chain(llm, db) | |
# FIXME Move additional advice into system prompt | |
example_question = "Show me all birds" | |
additional_advice = ''' | |
. Return all matching columns using SELECT * in the query. | |
You must use only the space-separated two-word binomial scientific name as the "species" column, | |
and not the "genus" column, such as "species"="Homo sapiens". | |
Avoid double quoting. Do not use LIMIT, always return all results. | |
Do not include explanations of queries. | |
''' | |
#@st.cache_data | |
def manual_query(species, zoom): | |
df = filter_gbif(gbif_h3, species) | |
df = get_h3point_df(df, zoom) | |
return df | |
#@st.cache_data | |
def chat_query(query, zoom): | |
df = con.sql(query) | |
df = get_h3point_df(df, zoom) | |
return df | |
source = { | |
"url": "https://data.source.coop/cboettig/us-boundaries/mappinginequality.json", | |
"type": "vector", | |
} | |
layer = { | |
"id": "mappinginequality", | |
"source": "mappinginequality", | |
"source-layer": "mappinginequality", | |
"type": "fill", | |
"min-zoom": 15, | |
"paint": {"fill-color": ["get", "fill"], "fill-opacity": 0.8}, | |
} | |
# + | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
zoom = st.slider("H3 resolution", min_value=2, max_value=11, value=9) | |
v_scale = st.slider("vertical scale", min_value=-3, max_value=3, value=0) | |
with col2: | |
"π Data Layers" | |
if st.toggle("satellite"): | |
m.add_basemap("satellite") | |
if st.toggle("redlining"): | |
# redlining = "https://dsl.richmond.edu/panorama/redlining/static/mappinginequality.json" | |
# redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CASanFrancisco1937/geojson.json" | |
# redlining = "https://data.source.coop/cboettig/us-boundaries/mappinginequality.json" | |
#redlining = "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles" | |
redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CTNewHaven1937/geojson.json" | |
paint = {"fill-color": ["get", "fill"], "fill-opacity": 0.8} | |
m.add_geojson(redlining, layer_type="fill", name = "redlining", paint=paint) | |
# m.add_pmtiles(redlining, layer_type="fill", name = "redlining", paint=paint, fit_bounds = False) | |
# m.add_source("mappinginequality", source) | |
# m.add_layer(layer) | |
# if st.toggle("Threatened Species Richness"): | |
# m.add_tile_layer(url="https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png", | |
# name="MOBI Species Richness", | |
# attribution="NatureServe", | |
# opacity=0.9 | |
# ) | |
with col3: | |
species = st.text_input("Species name:", "Canis latrans") | |
df = manual_query(species, zoom) | |
chatbox = st.container() | |
st.markdown("π¦ Or try our chat-based query:") | |
if prompt := st.chat_input(example_question, key="chain"): | |
st.chat_message("user").write(prompt) | |
with st.chat_message("assistant"): | |
query = chain.invoke({"question": prompt + additional_advice}) | |
st.write(query) | |
df = chat_query(query, zoom) | |
# if st.button("refresh"): | |
# chat_query.clear() | |
" " | |
st.divider() | |
# + | |
# with col2: | |
# min_lng, max_lng = st.slider( | |
# "Select longitude range", | |
# min_value=-130.0, | |
# max_value=-65.0, | |
# value=(-128.0, -115.0), # Default selected range | |
# step=0.1) | |
# min_lat, max_lat = st.slider( | |
# "Select latitude range", | |
# min_value=20.0, | |
# max_value=70.0, | |
# value=(30.0, 42.0), # Default selected range | |
# step=0.1) | |
# - | |
# + | |
map = st.container() | |
with map: | |
hex_layer(m, df, v_scale) | |
m.add_layer_control(position="top-left") | |
m.to_streamlit() | |
# + | |
st.divider() | |
''' | |
## Credits | |
DRAFT. Open Source Software developed at UC Berkeley. | |
''' | |