pgzmnk commited on
Commit
640dc7a
1 Parent(s): 4c8c6b4

Restructure repo. Show map is functional. Calculation is not yet functional.

Browse files
Files changed (3) hide show
  1. app.py +189 -195
  2. utils/duckdb_queries.py +30 -0
  3. utils/{js.py → gradio.py} +0 -0
app.py CHANGED
@@ -10,10 +10,10 @@ import pandas as pd
10
  import plotly.graph_objects as go
11
  import yaml
12
  import numpy as np
13
- from google.oauth2 import service_account
14
 
15
 
16
- from utils.js import get_window_url_params
 
17
 
18
  # Logging
19
  logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
@@ -47,25 +47,32 @@ class IndexGenerator:
47
  self,
48
  centroid,
49
  roi_radius,
50
- year,
51
  indices_file,
52
  project_name="",
53
  map=None,
54
  ):
 
 
 
 
 
 
 
 
55
  self.indices = self._load_indices(indices_file)
56
  self.centroid = centroid
57
  self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
58
- self.year = year
59
- self.start_date = str(datetime.date(self.year, 1, 1))
60
- self.end_date = str(datetime.date(self.year, 12, 31))
61
- self.daterange = [self.start_date, self.end_date]
62
- self.project_name = project_name
63
  self.map = map
64
  if self.map is not None:
65
  self.show = True
66
  else:
67
  self.show = False
68
 
 
69
  def _cloudfree(self, gee_path):
70
  """
71
  Internal method to generate a cloud-free composite.
@@ -184,208 +191,195 @@ class IndexGenerator:
184
  df = pd.DataFrame(data)
185
  return df
186
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- def set_up_duckdb():
189
- logging.info("set up duckdb")
190
- # use `climatebase` db
191
- if not os.getenv("motherduck_token"):
192
- raise Exception(
193
- "No motherduck token found. Please set the `motherduck_token` environment variable."
194
- )
195
- else:
196
- con = duckdb.connect("md:climatebase")
197
- con.sql("USE climatebase;")
198
-
199
- # load extensions
200
- con.sql("""INSTALL spatial; LOAD spatial;""")
201
-
202
- return con
203
 
 
204
 
205
- def authenticate_ee(ee_service_account):
206
- """
207
- Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
208
- """
209
- logging.info("authenticate_ee")
210
- credentials = ee.ServiceAccountCredentials(
211
- ee_service_account, key_data=os.environ["ee_service_account"]
212
- )
213
- ee.Initialize(credentials)
214
-
215
-
216
- def load_indices(indices_file):
217
- # Read index configurations
218
- with open(indices_file, "r") as stream:
219
- try:
220
- return yaml.safe_load(stream)
221
- except yaml.YAMLError as e:
222
- logging.error(e)
223
- return None
224
-
225
-
226
- def create_dataframe(years, project_name):
227
- dfs = []
228
- logging.info(years)
229
- indices = load_indices(INDICES_FILE)
230
- for year in years:
231
- logging.info(year)
232
- ig = IndexGenerator(
233
- centroid=LOCATION,
234
- roi_radius=ROI_RADIUS,
235
- year=year,
236
- indices_file=INDICES_FILE,
237
- project_name=project_name,
238
  )
239
- df = ig.generate_composite_index_df(list(indices.keys()))
240
- dfs.append(df)
241
- return pd.concat(dfs)
242
-
243
-
244
- # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
245
- def get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(
246
- longitudes=None, latitudes=None
247
- ):
248
- """Function documentation:\n
249
- Basic framework adopted from Krichardson under the following thread:
250
- https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
251
-
252
- # NOTE:
253
- # THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
254
- # in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
257
- the center coordinate tuple of all provided coordinate tuples.
258
- """
259
 
260
- # Check whether both latitudes and longitudes have been passed,
261
- # or if the list lenghts don't match
262
- if (latitudes is None or longitudes is None) or (len(latitudes) != len(longitudes)):
263
- # Otherwise, return the default values of 0 zoom and the coordinate origin as center point
264
- return 0, (0, 0)
265
-
266
- # Get the boundary-box
267
- b_box = {}
268
- b_box["height"] = latitudes.max() - latitudes.min()
269
- b_box["width"] = longitudes.max() - longitudes.min()
270
- b_box["center"] = (np.mean(longitudes), np.mean(latitudes))
271
-
272
- # get the area of the bounding box in order to calculate a zoom-level
273
- area = b_box["height"] * b_box["width"]
274
-
275
- # * 1D-linear interpolation with numpy:
276
- # - Pass the area as the only x-value and not as a list, in order to return a scalar as well
277
- # - The x-points "xp" should be in parts in comparable order of magnitude of the given area
278
- # - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
279
- # which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
280
- # as these variables are antiproportional
281
- zoom = np.interp(
282
- x=area,
283
- xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
284
- fp=[20, 15, 14, 13, 12, 7, 5],
285
- )
286
 
287
- # Finally, return the zoom level and the associated boundary-box center coordinates
288
- return zoom, b_box["center"]
289
-
290
-
291
- def show_project_map(project_name):
292
- prepared_statement = con.execute(
293
- "SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
294
- ).fetchall()
295
- features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
296
- geometry = features[0]["geometry"]
297
- longitudes = np.array(geometry["coordinates"])[0, :, 0]
298
- latitudes = np.array(geometry["coordinates"])[0, :, 1]
299
- (
300
- zoom,
301
- bbox_center,
302
- ) = get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(
303
- longitudes, latitudes
304
- )
305
- fig = go.Figure(
306
- go.Scattermapbox(
307
- mode="markers",
308
- lon=[bbox_center[0]],
309
- lat=[bbox_center[1]],
310
- marker={"size": 20, "color": ["cyan"]},
 
 
 
311
  )
312
- )
313
-
314
- fig.update_layout(
315
- mapbox={
316
- "style": "stamen-terrain",
317
- "center": {"lon": bbox_center[0], "lat": bbox_center[1]},
318
- "zoom": zoom,
319
- "layers": [
320
- {
321
- "source": {
322
- "type": "FeatureCollection",
323
- "features": [{"type": "Feature", "geometry": geometry}],
324
- },
325
- "type": "fill",
326
- "below": "traces",
327
- "color": "royalblue",
328
- }
329
- ],
330
- },
331
- margin={"l": 0, "r": 0, "b": 0, "t": 0},
332
- )
333
-
334
- return fig
335
-
336
 
337
- # minMax.getInfo()
338
- def calculate_biodiversity_score(start_year, end_year, project_name):
339
- years = []
340
- for year in range(start_year, end_year):
341
- row_exists = con.execute(
342
- "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
343
- [year, project_name],
344
- ).fetchall()[0][0]
345
- if not row_exists:
346
- years.append(year)
347
-
348
- if len(years) > 0:
349
- df = create_dataframe(years, project_name)
 
 
 
 
 
 
 
350
 
351
- # Write score table to `_temptable`
352
- con.sql(
353
- "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  )
355
 
356
- # Create `bioindicator` table IF NOT EXISTS.
357
- con.sql(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  """
359
- USE climatebase;
360
- CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
361
- """
362
- )
363
- # UPSERT project record
364
- con.sql(
365
  """
366
- INSERT INTO bioindicator FROM _temptable
367
- ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
368
- """
369
- )
370
- logging.info("upsert records into motherduck")
371
- scores = con.execute(
372
- "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
373
- [start_year, end_year, project_name],
374
- ).df()
375
- return scores
 
 
 
 
 
376
 
 
 
377
 
378
- def motherduck_list_projects(author_id):
379
- return con.execute(
380
- "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
381
- [author_id],
382
- ).df()
383
 
384
 
385
- with gr.Blocks() as demo:
386
- # Environment setup
387
- authenticate_ee(GEE_SERVICE_ACCOUNT)
388
- con = set_up_duckdb()
389
  with gr.Column():
390
  m1 = gr.Plot()
391
  with gr.Row():
@@ -402,19 +396,19 @@ with gr.Blocks() as demo:
402
  label="Biodiversity scores by year",
403
  )
404
  calc_btn.click(
405
- calculate_biodiversity_score,
406
  inputs=[start_year, end_year, project_name],
407
  outputs=results_df,
408
  )
409
  view_btn.click(
410
- fn=show_project_map,
411
  inputs=[project_name],
412
  outputs=[m1],
413
  )
414
 
415
  def update_project_dropdown_list(url_params):
416
  username = url_params.get("username", "default")
417
- projects = motherduck_list_projects(author_id=username)
418
  # to-do: filter projects based on user
419
  return gr.Dropdown.update(choices=projects["name"].tolist())
420
 
 
10
  import plotly.graph_objects as go
11
  import yaml
12
  import numpy as np
 
13
 
14
 
15
+ from utils.gradio import get_window_url_params
16
+ from utils.duckdb_queries import list_projects_by_author, get_project_geometry
17
 
18
  # Logging
19
  logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 
47
  self,
48
  centroid,
49
  roi_radius,
 
50
  indices_file,
51
  project_name="",
52
  map=None,
53
  ):
54
+
55
+
56
+ # Authenticate to GEE & DuckDB
57
+ self._authenticate_ee(GEE_SERVICE_ACCOUNT)
58
+ self.con = self._get_duckdb_conn()
59
+
60
+
61
+ # Set instance variables
62
  self.indices = self._load_indices(indices_file)
63
  self.centroid = centroid
64
  self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
65
+ # self.start_date = str(datetime.date(self.year, 1, 1))
66
+ # self.end_date = str(datetime.date(self.year, 12, 31))
67
+ # self.daterange = [self.start_date, self.end_date]
68
+ # self.project_name = project_name
 
69
  self.map = map
70
  if self.map is not None:
71
  self.show = True
72
  else:
73
  self.show = False
74
 
75
+
76
  def _cloudfree(self, gee_path):
77
  """
78
  Internal method to generate a cloud-free composite.
 
191
  df = pd.DataFrame(data)
192
  return df
193
 
194
+ @staticmethod
195
+ def _get_duckdb_conn():
196
+ logging.info("Configuring DuckDB connection...")
197
+ # use `climatebase` db
198
+ if not os.getenv("motherduck_token"):
199
+ raise Exception(
200
+ "No motherduck token found. Please set the `motherduck_token` environment variable."
201
+ )
202
+ else:
203
+ con = duckdb.connect("md:climatebase")
204
+ con.sql("USE climatebase;")
205
 
206
+ # load extensions
207
+ con.sql("""INSTALL spatial; LOAD spatial;""")
208
+ logging.info("Configured DuckDB connection.")
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ return con
211
 
212
+ @staticmethod
213
+ def _authenticate_ee(ee_service_account):
214
+ """
215
+ Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
216
+ """
217
+ logging.info("Authenticating to Google Earth Engine...")
218
+ credentials = ee.ServiceAccountCredentials(
219
+ ee_service_account, key_data=os.environ["ee_service_account"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  )
221
+ ee.Initialize(credentials)
222
+ logging.info("Authenticated to Google Earth Engine.")
223
+
224
+ def _create_dataframe(self, years, project_name):
225
+ dfs = []
226
+ logging.info(years)
227
+ indices = self._load_indices(INDICES_FILE)
228
+ for year in years:
229
+ logging.info(year)
230
+ ig = IndexGenerator(
231
+ centroid=LOCATION,
232
+ roi_radius=ROI_RADIUS,
233
+ year=year,
234
+ indices_file=INDICES_FILE,
235
+ project_name=project_name,
236
+ )
237
+ df = ig.generate_composite_index_df(list(indices.keys()))
238
+ dfs.append(df)
239
+ return pd.concat(dfs)
240
+
241
+ # h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
242
+ def _latlon_to_config(
243
+ self,
244
+ longitudes=None,
245
+ latitudes=None
246
+ ):
247
+ """Function documentation:\n
248
+ Basic framework adopted from Krichardson under the following thread:
249
+ https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
250
 
251
+ # NOTE:
252
+ # THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
253
+ # in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
254
 
255
+ Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
256
+ the center coordinate tuple of all provided coordinate tuples.
257
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
+ # Check whether both latitudes and longitudes have been passed,
260
+ # or if the list lenghts don't match
261
+ if (latitudes is None or longitudes is None) or (
262
+ len(latitudes) != len(longitudes)
263
+ ):
264
+ # Otherwise, return the default values of 0 zoom and the coordinate origin as center point
265
+ return 0, (0, 0)
266
+
267
+ # Get the boundary-box
268
+ b_box = {}
269
+ b_box["height"] = latitudes.max() - latitudes.min()
270
+ b_box["width"] = longitudes.max() - longitudes.min()
271
+ b_box["center"] = (np.mean(longitudes), np.mean(latitudes))
272
+
273
+ # get the area of the bounding box in order to calculate a zoom-level
274
+ area = b_box["height"] * b_box["width"]
275
+
276
+ # * 1D-linear interpolation with numpy:
277
+ # - Pass the area as the only x-value and not as a list, in order to return a scalar as well
278
+ # - The x-points "xp" should be in parts in comparable order of magnitude of the given area
279
+ # - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
280
+ # which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
281
+ # as these variables are antiproportional
282
+ zoom = np.interp(
283
+ x=area,
284
+ xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
285
+ fp=[20, 15, 14, 13, 12, 7, 5],
286
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ # Finally, return the zoom level and the associated boundary-box center coordinates
289
+ return zoom, b_box["center"]
290
+
291
+ def show_project_map(self, project_name):
292
+ breakpoint()
293
+ prepared_statement = get_project_geometry(project_name)
294
+ # self.con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()
295
+ features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
296
+ geometry = features[0]["geometry"]
297
+ longitudes = np.array(geometry["coordinates"])[0, :, 0]
298
+ latitudes = np.array(geometry["coordinates"])[0, :, 1]
299
+ zoom, bbox_center = self._latlon_to_config(longitudes, latitudes)
300
+ fig = go.Figure(
301
+ go.Scattermapbox(
302
+ mode="markers",
303
+ lon=[bbox_center[0]],
304
+ lat=[bbox_center[1]],
305
+ marker={"size": 20, "color": ["cyan"]},
306
+ )
307
+ )
308
 
309
+ fig.update_layout(
310
+ mapbox={
311
+ "style": "stamen-terrain",
312
+ "center": {"lon": bbox_center[0], "lat": bbox_center[1]},
313
+ "zoom": zoom,
314
+ "layers": [
315
+ {
316
+ "source": {
317
+ "type": "FeatureCollection",
318
+ "features": [{"type": "Feature", "geometry": geometry}],
319
+ },
320
+ "type": "fill",
321
+ "below": "traces",
322
+ "color": "royalblue",
323
+ }
324
+ ],
325
+ },
326
+ margin={"l": 0, "r": 0, "b": 0, "t": 0},
327
  )
328
 
329
+ return fig
330
+
331
+ def calculate_biodiversity_score(self, start_year, end_year, project_name):
332
+ years = []
333
+ for year in range(start_year, end_year):
334
+ row_exists = con.execute(
335
+ "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
336
+ [year, project_name],
337
+ ).fetchall()[0][0]
338
+ if not row_exists:
339
+ years.append(year)
340
+
341
+ if len(years) > 0:
342
+ df = self._create_dataframe(years, project_name)
343
+
344
+ # Write score table to `_temptable`
345
+ self.con.sql(
346
+ "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
347
+ )
348
+
349
+ # Create `bioindicator` table IF NOT EXISTS.
350
+ self.con.sql(
351
+ """
352
+ USE climatebase;
353
+ CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
354
  """
355
+ )
356
+ # UPSERT project record
357
+ self.con.sql(
358
+ """
359
+ INSERT INTO bioindicator FROM _temptable
360
+ ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
361
  """
362
+ )
363
+ logging.info("upsert records into motherduck")
364
+ scores = self.con.execute(
365
+ "SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
366
+ [start_year, end_year, project_name],
367
+ ).df()
368
+ return scores
369
+
370
+
371
+ # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
372
+ indexgenerator = IndexGenerator(
373
+ centroid=LOCATION,
374
+ roi_radius=ROI_RADIUS,
375
+ indices_file=INDICES_FILE,
376
+ )
377
 
378
+ with gr.Blocks() as demo:
379
+ print("start gradio app")
380
 
 
 
 
 
 
381
 
382
 
 
 
 
 
383
  with gr.Column():
384
  m1 = gr.Plot()
385
  with gr.Row():
 
396
  label="Biodiversity scores by year",
397
  )
398
  calc_btn.click(
399
+ indexgenerator.calculate_biodiversity_score,
400
  inputs=[start_year, end_year, project_name],
401
  outputs=results_df,
402
  )
403
  view_btn.click(
404
+ fn=indexgenerator.show_project_map,
405
  inputs=[project_name],
406
  outputs=[m1],
407
  )
408
 
409
  def update_project_dropdown_list(url_params):
410
  username = url_params.get("username", "default")
411
+ projects = list_projects_by_author(author_id=username)
412
  # to-do: filter projects based on user
413
  return gr.Dropdown.update(choices=projects["name"].tolist())
414
 
utils/duckdb_queries.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import duckdb
3
+
4
+ import logging
5
+
6
+
7
+ # Configure DuckDB connection
8
+ logging.info("Configuring DuckDB connection...")
9
+
10
+ if not os.getenv("motherduck_token"):
11
+ raise Exception(
12
+ "No motherduck token found. Please set the `motherduck_token` environment variable."
13
+ )
14
+ else:
15
+ con = duckdb.connect("md:climatebase")
16
+ con.sql("USE climatebase;")
17
+
18
+ # load extensions
19
+ con.sql("""INSTALL spatial; LOAD spatial;""")
20
+ logging.info("Configured DuckDB connection.")
21
+
22
+
23
+ def list_projects_by_author(author_id):
24
+ return con.execute(
25
+ "SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
26
+ [author_id],
27
+ ).df()
28
+
29
+ def get_project_geometry(project_name):
30
+ return con.execute("SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]).fetchall()
utils/{js.py → gradio.py} RENAMED
File without changes