Spaces:
Sleeping
Sleeping
so it begins...
Browse files- .gitignore +11 -0
- US_NE.parquet +3 -0
- app.py +278 -0
- requirements.txt +18 -0
.gitignore
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.Rproj.user
|
2 |
+
.Rhistory
|
3 |
+
.RData
|
4 |
+
.Ruserdata
|
5 |
+
.ipynb_checkpoints
|
6 |
+
*.Rproj
|
7 |
+
*.duckdb
|
8 |
+
*.wal
|
9 |
+
*.vrt
|
10 |
+
.streamlit
|
11 |
+
__pycache__
|
US_NE.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1e5c6b7a344e1eac12daf0e9cd4cf6155df8c4943eb4fd9af0899d7db16cd2e
|
3 |
+
size 21506681033
|
app.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import leafmap.maplibregl as leafmap
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from matplotlib import cm
|
6 |
+
import ibis
|
7 |
+
from ibis import _
|
8 |
+
from huggingface_hub import HfApi, login
|
9 |
+
from langchain_openai import ChatOpenAI
|
10 |
+
from langchain_community.utilities import SQLDatabase
|
11 |
+
from langchain.chains import create_sql_query_chain
|
12 |
+
|
13 |
+
|
14 |
+
# +
|
15 |
+
## Benchmark possible access locations, but local blows everything else away
|
16 |
+
# h3_parquet = "https://huggingface.co/datasets/boettiger-lab/gbif/resolve/main/gbif_ca.geoparquet"
|
17 |
+
|
18 |
+
h3_parquet = "US_NE.parquet"
|
19 |
+
# with more local storage space, full US works fine.
|
20 |
+
#h3_parquet = "/home/rstudio/source.coop/cboettig/gbif/gbif_us_h3.parquet"
|
21 |
+
|
22 |
+
con = ibis.duckdb.connect(extensions=["spatial", "httpfs"])
|
23 |
+
gbif_h3 = con.read_parquet(h3_parquet, "gbif_h3")
|
24 |
+
# -
|
25 |
+
|
26 |
+
|
27 |
+
st.set_page_config(page_title="GBIF Observations Explorer", layout="wide")
|
28 |
+
st.header("GBIF Observations Explorer", divider="rainbow")
|
29 |
+
|
30 |
+
|
31 |
+
## We have actually pre-calculated most of these so we don't need them here.
|
32 |
+
@ibis.udf.scalar.builtin
|
33 |
+
def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int:
|
34 |
+
...
|
35 |
+
|
36 |
+
@ibis.udf.scalar.builtin
|
37 |
+
def hex(array) -> str:
|
38 |
+
...
|
39 |
+
|
40 |
+
@ibis.udf.scalar.builtin
|
41 |
+
def h3_cell_to_boundary_wkt (array) -> str:
|
42 |
+
...
|
43 |
+
## some versions need this manual install of h3 duckdb extension
|
44 |
+
# con.raw_sql('''
|
45 |
+
# INSTALL h3 FROM community;
|
46 |
+
# LOAD h3;
|
47 |
+
# ''')
|
48 |
+
|
49 |
+
|
50 |
+
# -
|
51 |
+
|
52 |
+
def filter_gbif(_df, species="Canis lupus", bbox = [-130., 30., -90., 60.]):
|
53 |
+
return (_df
|
54 |
+
.filter(_.decimallongitude >= bbox[0],
|
55 |
+
_.decimallongitude < bbox[2],
|
56 |
+
_.decimallatitude >= bbox[1],
|
57 |
+
_.decimallatitude < bbox[3],
|
58 |
+
_.species == species
|
59 |
+
)
|
60 |
+
)
|
61 |
+
|
62 |
+
def get_h3point_df(_df, resolution: float) -> pd.DataFrame:
|
63 |
+
column = "h" + str(resolution)
|
64 |
+
df = (_df
|
65 |
+
.rename(hex = column)
|
66 |
+
.group_by(_.hex)
|
67 |
+
.agg(n = _.count())
|
68 |
+
# .mutate(wkt = h3_cell_to_boundary_wkt(_.hex))
|
69 |
+
.mutate(v = _.n.log())
|
70 |
+
.mutate(normalized_values = _.v / _.v.max())
|
71 |
+
.to_pandas()
|
72 |
+
)
|
73 |
+
rgb = cm.viridis(df.normalized_values)
|
74 |
+
rgb_array = np.round( rgb * 255 ).astype(int).clip(0,255).tolist()
|
75 |
+
df['rgb'] = rgb_array
|
76 |
+
#df['viridis_hex'] = colors.to_hex(rgb) # not robust?
|
77 |
+
df['viridis_hex'] = [f"#{int(c[0] * 255):02x}{int(c[1] * 255):02x}{int(c[2] * 255):02x}" for c in rgb]
|
78 |
+
return df
|
79 |
+
|
80 |
+
|
81 |
+
# +
|
82 |
+
login(st.secrets["HF_TOKEN"])
|
83 |
+
def host_df(df, filename = "live.json", repo_id="boettiger-lab/gbif"):
|
84 |
+
df.to_json(".static/"+filename, orient='records', indent=2)
|
85 |
+
api = HfApi()
|
86 |
+
info = api.upload_file(
|
87 |
+
path_or_fileobj=".static/"+filename,
|
88 |
+
path_in_repo="live/" + filename,
|
89 |
+
repo_id=repo_id,
|
90 |
+
repo_type="dataset",
|
91 |
+
)
|
92 |
+
# to avoid cache, use unique commit url
|
93 |
+
commit_hash = info.oid
|
94 |
+
return f"https://huggingface.co/datasets/{repo_id}/resolve/{commit_hash}/live/{filename}"
|
95 |
+
|
96 |
+
def hex_layer(m, df: pd.DataFrame, v_scale = 1):
|
97 |
+
url = host_df(df)
|
98 |
+
|
99 |
+
deck_grid_layer = {
|
100 |
+
"@@type": "H3HexagonLayer",
|
101 |
+
"id": "my-layer",
|
102 |
+
"data": url,
|
103 |
+
"getHexagon": "@@=hex",
|
104 |
+
"getFillColor": "@@=rgb",
|
105 |
+
"getElevation": "@@=normalized_values",
|
106 |
+
"elevationScale": 5000 * 10 ** v_scale,
|
107 |
+
"elevationRange": [0,1],
|
108 |
+
}
|
109 |
+
return m.add_deck_layers([deck_grid_layer], "occurrences")
|
110 |
+
|
111 |
+
|
112 |
+
# +
|
113 |
+
# #%%time
|
114 |
+
def local_test():
|
115 |
+
bbox = [-120, 37, -118, 39]
|
116 |
+
df = filter_gbif(gbif_h3, species = "Canis lupus", bbox = bbox)
|
117 |
+
df = get_h3point_df(df, 6)
|
118 |
+
m = leafmap.Map(style="openstreetmap", center=(-121.4, 37.74), zoom=7,)
|
119 |
+
hex_layer(m, df)
|
120 |
+
return m
|
121 |
+
|
122 |
+
|
123 |
+
#local_test()
|
124 |
+
|
125 |
+
# +
|
126 |
+
import os
|
127 |
+
import streamlit as st
|
128 |
+
os.environ["MAPTILER_KEY"] = st.secrets["MAPTILER_KEY"]
|
129 |
+
|
130 |
+
import leafmap.maplibregl as leafmap
|
131 |
+
m = leafmap.Map(style="positron", center=(-121.4, 37.50), zoom=7,)
|
132 |
+
#m
|
133 |
+
|
134 |
+
|
135 |
+
# +
|
136 |
+
# Set up Langchain SQL access
|
137 |
+
db = SQLDatabase.from_uri("duckdb:///tmp.duckdb", view_support=True)
|
138 |
+
db.run(f"create or replace view gbif_h3 as select * from read_parquet('{h3_parquet}');")
|
139 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
|
140 |
+
chain = create_sql_query_chain(llm, db)
|
141 |
+
|
142 |
+
# FIXME Move additional advice into system prompt
|
143 |
+
|
144 |
+
example_question = "Show me all birds"
|
145 |
+
additional_advice = '''
|
146 |
+
. Return all matching columns using SELECT * in the query.
|
147 |
+
You must use only the space-separated two-word binomial scientific name as the "species" column,
|
148 |
+
and not the "genus" column, such as "species"="Homo sapiens".
|
149 |
+
Avoid double quoting. Do not use LIMIT, always return all results.
|
150 |
+
Do not include explanations of queries.
|
151 |
+
'''
|
152 |
+
|
153 |
+
|
154 |
+
#@st.cache_data
|
155 |
+
def manual_query(species, zoom):
|
156 |
+
df = filter_gbif(gbif_h3, species)
|
157 |
+
df = get_h3point_df(df, zoom)
|
158 |
+
return df
|
159 |
+
|
160 |
+
|
161 |
+
#@st.cache_data
|
162 |
+
def chat_query(query, zoom):
|
163 |
+
df = con.sql(query)
|
164 |
+
df = get_h3point_df(df, zoom)
|
165 |
+
return df
|
166 |
+
|
167 |
+
|
168 |
+
source = {
|
169 |
+
"url": "https://data.source.coop/cboettig/us-boundaries/mappinginequality.json",
|
170 |
+
"type": "vector",
|
171 |
+
}
|
172 |
+
|
173 |
+
layer = {
|
174 |
+
"id": "mappinginequality",
|
175 |
+
"source": "mappinginequality",
|
176 |
+
"source-layer": "mappinginequality",
|
177 |
+
"type": "fill",
|
178 |
+
"min-zoom": 15,
|
179 |
+
"paint": {"fill-color": ["get", "fill"], "fill-opacity": 0.8},
|
180 |
+
}
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
# +
|
185 |
+
col1, col2, col3 = st.columns(3)
|
186 |
+
|
187 |
+
with col1:
|
188 |
+
zoom = st.slider("H3 resolution", min_value=2, max_value=11, value=9)
|
189 |
+
v_scale = st.slider("vertical scale", min_value=-3, max_value=3, value=0)
|
190 |
+
|
191 |
+
with col2:
|
192 |
+
"π Data Layers"
|
193 |
+
|
194 |
+
if st.toggle("satellite"):
|
195 |
+
m.add_basemap("satellite")
|
196 |
+
|
197 |
+
if st.toggle("redlining"):
|
198 |
+
# redlining = "https://dsl.richmond.edu/panorama/redlining/static/mappinginequality.json"
|
199 |
+
# redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CASanFrancisco1937/geojson.json"
|
200 |
+
# redlining = "https://data.source.coop/cboettig/us-boundaries/mappinginequality.json"
|
201 |
+
#redlining = "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles"
|
202 |
+
redlining = "https://dsl.richmond.edu/panorama/redlining/static/citiesData/CTNewHaven1937/geojson.json"
|
203 |
+
|
204 |
+
paint = {"fill-color": ["get", "fill"], "fill-opacity": 0.8}
|
205 |
+
m.add_geojson(redlining, layer_type="fill", name = "redlining", paint=paint)
|
206 |
+
# m.add_pmtiles(redlining, layer_type="fill", name = "redlining", paint=paint, fit_bounds = False)
|
207 |
+
# m.add_source("mappinginequality", source)
|
208 |
+
# m.add_layer(layer)
|
209 |
+
# if st.toggle("Threatened Species Richness"):
|
210 |
+
# m.add_tile_layer(url="https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png",
|
211 |
+
# name="MOBI Species Richness",
|
212 |
+
# attribution="NatureServe",
|
213 |
+
# opacity=0.9
|
214 |
+
# )
|
215 |
+
|
216 |
+
with col3:
|
217 |
+
species = st.text_input("Species name:", "Canis latrans")
|
218 |
+
df = manual_query(species, zoom)
|
219 |
+
|
220 |
+
chatbox = st.container()
|
221 |
+
st.markdown("π¦ Or try our chat-based query:")
|
222 |
+
if prompt := st.chat_input(example_question, key="chain"):
|
223 |
+
st.chat_message("user").write(prompt)
|
224 |
+
with st.chat_message("assistant"):
|
225 |
+
query = chain.invoke({"question": prompt + additional_advice})
|
226 |
+
st.write(query)
|
227 |
+
df = chat_query(query, zoom)
|
228 |
+
# if st.button("refresh"):
|
229 |
+
# chat_query.clear()
|
230 |
+
|
231 |
+
" "
|
232 |
+
st.divider()
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
+
# +
|
237 |
+
# with col2:
|
238 |
+
# min_lng, max_lng = st.slider(
|
239 |
+
# "Select longitude range",
|
240 |
+
# min_value=-130.0,
|
241 |
+
# max_value=-65.0,
|
242 |
+
# value=(-128.0, -115.0), # Default selected range
|
243 |
+
# step=0.1)
|
244 |
+
# min_lat, max_lat = st.slider(
|
245 |
+
# "Select latitude range",
|
246 |
+
# min_value=20.0,
|
247 |
+
# max_value=70.0,
|
248 |
+
# value=(30.0, 42.0), # Default selected range
|
249 |
+
# step=0.1)
|
250 |
+
# -
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
|
255 |
+
|
256 |
+
# +
|
257 |
+
map = st.container()
|
258 |
+
|
259 |
+
with map:
|
260 |
+
hex_layer(m, df, v_scale)
|
261 |
+
m.add_layer_control(position="top-left")
|
262 |
+
m.to_streamlit()
|
263 |
+
|
264 |
+
|
265 |
+
|
266 |
+
# +
|
267 |
+
|
268 |
+
|
269 |
+
st.divider()
|
270 |
+
|
271 |
+
'''
|
272 |
+
|
273 |
+
## Credits
|
274 |
+
|
275 |
+
DRAFT. Open Source Software developed at UC Berkeley.
|
276 |
+
|
277 |
+
'''
|
278 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
duckdb==1.0.0
|
2 |
+
pandas==2.2.2
|
3 |
+
git+https://github.com/eodaGmbH/py-maplibregl@feature/color-utils
|
4 |
+
leafmap[maplibre]
|
5 |
+
ibis-framework[duckdb]==9.1.0
|
6 |
+
streamlit==1.35.0
|
7 |
+
streamlit_folium==0.20.0
|
8 |
+
altair==5.3.0
|
9 |
+
referencing==0.35.1
|
10 |
+
rasterio==1.3.10
|
11 |
+
shapely==2.0.4
|
12 |
+
shiny==0.10.2
|
13 |
+
huggingface_hub
|
14 |
+
duckdb-engine
|
15 |
+
langchain
|
16 |
+
langchain-community
|
17 |
+
langchain-openai
|
18 |
+
streamlit
|