import streamlit as st import leafmap.maplibregl as leafmap import pandas as pd import numpy as np from matplotlib import cm import ibis from ibis import _ from huggingface_hub import HfApi, login from langchain_openai import ChatOpenAI from langchain_community.utilities import SQLDatabase from langchain.chains import create_sql_query_chain # + ## Benchmark possible access locations, but local blows everything else away # h3_parquet = "https://huggingface.co/datasets/boettiger-lab/gbif/resolve/main/gbif_ca.geoparquet" h3_parquet = "US_NE.parquet" # with more local storage space, full US works fine. #h3_parquet = "/home/rstudio/source.coop/cboettig/gbif/gbif_us_h3.parquet" con = ibis.duckdb.connect(extensions=["spatial", "httpfs"]) gbif_h3 = con.read_parquet(h3_parquet, "gbif_h3") # - st.set_page_config(page_title="GBIF Observations Explorer", layout="wide") st.header("GBIF Observations Explorer", divider="rainbow") ## We have actually pre-calculated most of these so we don't need them here. @ibis.udf.scalar.builtin def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int: ... @ibis.udf.scalar.builtin def hex(array) -> str: ... @ibis.udf.scalar.builtin def h3_cell_to_boundary_wkt (array) -> str: ... ## some versions need this manual install of h3 duckdb extension # con.raw_sql(''' # INSTALL h3 FROM community; # LOAD h3; # ''') # - def filter_gbif(_df, species="Canis lupus", bbox = [-130., 30., -90., 60.]): return (_df .filter(_.decimallongitude >= bbox[0], _.decimallongitude < bbox[2], _.decimallatitude >= bbox[1], _.decimallatitude < bbox[3], _.species == species ) ) def get_h3point_df(_df, resolution: float) -> pd.DataFrame: column = "h" + str(resolution) df = (_df .rename(hex = column) .group_by(_.hex) .agg(n = _.count()) # .mutate(wkt = h3_cell_to_boundary_wkt(_.hex)) .mutate(v = _.n.log()) .mutate(normalized_values = _.v / _.v.max()) .to_pandas() ) rgb = cm.viridis(df.normalized_values) rgb_array = np.round( rgb * 255 ).astype(int).clip(0,255).tolist() df['rgb'] = rgb_array #df['viridis_hex'] = colors.to_hex(rgb) # not robust? df['viridis_hex'] = [f"#{int(c[0] * 255):02x}{int(c[1] * 255):02x}{int(c[2] * 255):02x}" for c in rgb] return df # + login(st.secrets["HF_TOKEN"]) def host_df(df, filename = "live.json", repo_id="boettiger-lab/gbif"): df.to_json(".static/"+filename, orient='records', indent=2) api = HfApi() info = api.upload_file( path_or_fileobj=".static/"+filename, path_in_repo="live/" + filename, repo_id=repo_id, repo_type="dataset", ) # to avoid cache, use unique commit url commit_hash = info.oid return f"https://huggingface.co/datasets/{repo_id}/resolve/{commit_hash}/live/{filename}" def hex_layer(m, df: pd.DataFrame, v_scale = 1): url = host_df(df) deck_grid_layer = { "@@type": "H3HexagonLayer", "id": "my-layer", "data": url, "getHexagon": "@@=hex", "getFillColor": "@@=rgb", "getElevation": "@@=normalized_values", "elevationScale": 5000 * 10 ** v_scale, "elevationRange": [0,1], } return m.add_deck_layers([deck_grid_layer], "occurrences") # + # #%%time def local_test(): bbox = [-120, 37, -118, 39] df = filter_gbif(gbif_h3, species = "Canis lupus", bbox = bbox) df = get_h3point_df(df, 6) m = leafmap.Map(style="openstreetmap", center=(-121.4, 37.74), zoom=7,) hex_layer(m, df) return m #local_test() # + import os import streamlit as st os.environ["MAPTILER_KEY"] = st.secrets["MAPTILER_KEY"] import leafmap.maplibregl as leafmap m = leafmap.Map(style="positron", center=(-121.4, 37.50), zoom=7,) #m # + # Set up Langchain SQL access db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True) db.run(f"create or replace view gbif_h3 as select * from read_parquet('{h3_parquet}');") llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) chain = create_sql_query_chain(llm, db) # FIXME Move additional advice into system prompt example_question = "Show me all birds" additional_advice = ''' . Return all matching columns using SELECT * in the query. You must use only the space-separated two-word binomial scientific name as the "species" column, and not the "genus" column, such as "species"="Homo sapiens". Avoid double quoting. Do not use LIMIT, always return all results. Do not include explanations of queries. ''' #@st.cache_data def manual_query(species, zoom): df = filter_gbif(gbif_h3, species) df = get_h3point_df(df, zoom) return df #@st.cache_data def chat_query(query, zoom): df = con.sql(query) df = get_h3point_df(df, zoom) return df source = { "url": "https://data.source.coop/cboettig/us-boundaries/mappinginequality.json", "type": "vector", } layer = { "id": "mappinginequality", "source": "mappinginequality", "source-layer": "mappinginequality", "type": "fill", "min-zoom": 15, "paint": {"fill-color": ["get", "fill"], "fill-opacity": 0.8}, } # + col1, col2, col3 = st.columns(3) with col1: zoom = st.slider("H3 resolution", min_value=2, max_value=11, value=9) v_scale = st.slider("vertical scale", min_value=-3, max_value=3, value=0) with col2: "🌍 Data Layers" if st.toggle("satellite"): m.add_basemap("satellite") if st.toggle("redlining"): # redlining = "https://dsl.richmond.edu/panorama/redlining/static/mappinginequality.json" # redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CASanFrancisco1937/geojson.json" # redlining = "https://data.source.coop/cboettig/us-boundaries/mappinginequality.json" #redlining = "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles" redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CTNewHaven1937/geojson.json" paint = {"fill-color": ["get", "fill"], "fill-opacity": 0.8} m.add_geojson(redlining, layer_type="fill", name = "redlining", paint=paint) # m.add_pmtiles(redlining, layer_type="fill", name = "redlining", paint=paint, fit_bounds = False) # m.add_source("mappinginequality", source) # m.add_layer(layer) # if st.toggle("Threatened Species Richness"): # m.add_tile_layer(url="https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png", # name="MOBI Species Richness", # attribution="NatureServe", # opacity=0.9 # ) with col3: species = st.text_input("Species name:", "Canis latrans") df = manual_query(species, zoom) chatbox = st.container() st.markdown("🦜 Or try our chat-based query:") if prompt := st.chat_input(example_question, key="chain"): st.chat_message("user").write(prompt) with st.chat_message("assistant"): query = chain.invoke({"question": prompt + additional_advice}) st.write(query) df = chat_query(query, zoom) # if st.button("refresh"): # chat_query.clear() " " st.divider() # + # with col2: # min_lng, max_lng = st.slider( # "Select longitude range", # min_value=-130.0, # max_value=-65.0, # value=(-128.0, -115.0), # Default selected range # step=0.1) # min_lat, max_lat = st.slider( # "Select latitude range", # min_value=20.0, # max_value=70.0, # value=(30.0, 42.0), # Default selected range # step=0.1) # - # + map = st.container() with map: hex_layer(m, df, v_scale) m.add_layer_control(position="top-left") m.to_streamlit() # + st.divider() ''' ## Credits DRAFT. Open Source Software developed at UC Berkeley. '''