mbuuck commited on
Commit
12fd562
1 Parent(s): fae8684

Added coefficients/normalizations. Added rounding.

Browse files
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
 
3
  from utils import duckdb_queries as dq
@@ -5,13 +6,18 @@ from utils.gradio import get_window_url_params
5
  from utils.indicators import IndexGenerator
6
 
7
  # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
8
- indexgenerator = IndexGenerator(indices=["NDWI", "Water", "Protected", "Habitat"])
 
 
 
 
9
 
10
  with gr.Blocks() as demo:
11
  with gr.Column():
12
  m1 = gr.Plot()
13
  with gr.Row():
14
  project_name = gr.Dropdown([], label="Project", value="Select project")
 
15
  start_year = gr.Number(value=2017, label="Start Year", precision=0)
16
  end_year = gr.Number(value=2022, label="End Year", precision=0)
17
  with gr.Row():
@@ -37,8 +43,7 @@ with gr.Blocks() as demo:
37
  projects = dq.list_projects_by_author(author_id=username)
38
  # Initialize the first project in the list
39
  project_names = projects['name'].tolist()
40
- default_project = project_names[0]
41
- return gr.Dropdown.update(choices=project_names, value=default_project)
42
 
43
  # Change the project name in the index generator object when the
44
  # user selects a new project
@@ -47,6 +52,12 @@ with gr.Blocks() as demo:
47
  inputs=project_name
48
  )
49
 
 
 
 
 
 
 
50
  # Get url params
51
  url_params = gr.JSON({"username": "default"}, visible=False, label="URL Params")
52
 
 
1
+ import os
2
  import gradio as gr
3
 
4
  from utils import duckdb_queries as dq
 
6
  from utils.indicators import IndexGenerator
7
 
8
  # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
9
+ indexgenerator = IndexGenerator()
10
+
11
+ metric_names = os.listdir('metrics')
12
+ for i in range(len(metric_names)):
13
+ metric_names[i] = metric_names[i].split('.yaml')[0].replace('_', ' ')
14
 
15
  with gr.Blocks() as demo:
16
  with gr.Column():
17
  m1 = gr.Plot()
18
  with gr.Row():
19
  project_name = gr.Dropdown([], label="Project", value="Select project")
20
+ metric = gr.Dropdown(metric_names, label='Metric', value='Select metric')
21
  start_year = gr.Number(value=2017, label="Start Year", precision=0)
22
  end_year = gr.Number(value=2022, label="End Year", precision=0)
23
  with gr.Row():
 
43
  projects = dq.list_projects_by_author(author_id=username)
44
  # Initialize the first project in the list
45
  project_names = projects['name'].tolist()
46
+ return gr.Dropdown.update(choices=project_names)
 
47
 
48
  # Change the project name in the index generator object when the
49
  # user selects a new project
 
52
  inputs=project_name
53
  )
54
 
55
+ # Set the metric to be calculated
56
+ metric.change(
57
+ indexgenerator.set_metric,
58
+ inputs=metric
59
+ )
60
+
61
  # Get url params
62
  url_params = gr.JSON({"username": "default"}, visible=False, label="URL Params")
63
 
metrics/OE_Biodiversity.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Protected:
3
+ name: Protected
4
+ description: The total PA extent, including both marine (if applicable) and terrestrial areas provided by data provider as specified in the legal text for the site.
5
+ unit: km^2
6
+ min: 0
7
+ max: roi_area
8
+ roi: ''
9
+ gee_path: WCMC/WDPA/current/polygons
10
+ gee_type: feature_collection
11
+ viz:
12
+ palette:
13
+ - 2ed033
14
+ - 5aff05
15
+ - 67b9ff
16
+ - 5844ff
17
+ - 0a7618
18
+ - 2c05ff
19
+ min: 0
20
+ max: 1550000
21
+ opacity: 0.8
22
+ select: REP_AREA
23
+ bandname: constant
24
+ coefficient: 1
25
+ show: true
26
+ Soil:
27
+ name: Soil
28
+ description: An estimate of soil organic carbon content at b{n} cm depth.
29
+ unit: 5g/kg
30
+ min: 0
31
+ max: 120
32
+ roi: ''
33
+ gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
34
+ gee_type: image
35
+ viz:
36
+ bands:
37
+ - b0
38
+ min: 0
39
+ max: 12
40
+ palette:
41
+ - ffffa0
42
+ - f7fcb9
43
+ - d9f0a3
44
+ - addd8e
45
+ - 78c679
46
+ - 41ab5d
47
+ - '238443'
48
+ - 005b29
49
+ - 004b29
50
+ - 012b13
51
+ - 00120b
52
+ select: b0
53
+ bandname: b0
54
+ coefficient: 1
55
+ show: false
56
+ NDVI:
57
+ name: NDVI
58
+ description: Normalized difference vegetation index
59
+ unit: index (continuous)
60
+ min: -1
61
+ max: 1
62
+ roi: ''
63
+ gee_path: LANDSAT/LC08/C02/T1
64
+ gee_type: algebraic
65
+ normalized_difference:
66
+ - B4
67
+ - B3
68
+ viz:
69
+ min: -1
70
+ max: 1
71
+ palette:
72
+ - "#d73027"
73
+ - "#f46d43"
74
+ - "#fdae61"
75
+ - "#fee08b"
76
+ - "#d9ef8b"
77
+ - "#a6d96a"
78
+ - "#66bd63"
79
+ - "#1a9850"
80
+ bandname: nd
81
+ coefficient: 1
82
+ show: true
83
+ NDWI:
84
+ name: NDWI
85
+ description: An estimate of the water content of leaves.
86
+ unit: index (continuous)
87
+ min: -1
88
+ max: 1
89
+ roi: ''
90
+ gee_path: LANDSAT/LC08/C02/T1
91
+ gee_type: algebraic
92
+ normalized_difference:
93
+ - B5
94
+ - B6
95
+ viz:
96
+ min: -1
97
+ max: 1
98
+ palette:
99
+ - "#ece7f2"
100
+ - "#d0d1e6"
101
+ - "#a6bddb"
102
+ - "#74a9cf"
103
+ - "#3690c0"
104
+ - "#0570b0"
105
+ - "#045a8d"
106
+ - "#023858"
107
+ bandname: nd
108
+ coefficient: 1
109
+ show: true
indices.yaml → unused_metrics/OE_Biodiversity_20230723.yaml RENAMED
@@ -1,6 +1,10 @@
1
  ---
2
  Water:
3
  name: Water
 
 
 
 
4
  roi: ''
5
  gee_path: JRC/GSW1_1/GlobalSurfaceWater
6
  gee_type: image
@@ -13,9 +17,14 @@ Water:
13
  - 0000ff
14
  bandname: occurrence
15
  select: occurrence
 
16
  show: true
17
  Protected:
18
  name: Protected
 
 
 
 
19
  roi: ''
20
  gee_path: WCMC/WDPA/current/polygons
21
  gee_type: feature_collection
@@ -32,9 +41,14 @@ Protected:
32
  opacity: 0.8
33
  select: REP_AREA
34
  bandname: constant
 
35
  show: true
36
  Air:
37
  name: Air
 
 
 
 
38
  roi: ''
39
  gee_path: COPERNICUS/S5P/OFFL/L3_AER_AI
40
  gee_type: image_collection
@@ -52,15 +66,20 @@ Air:
52
  bandname: absorbing_aerosol_index
53
  select: absorbing_aerosol_index
54
  dates: false
 
55
  show: false
56
  Soil:
57
  name: Soil
 
 
 
 
58
  roi: ''
59
  gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
60
  gee_type: image
61
  viz:
62
  bands:
63
- - b200
64
  min: 0
65
  max: 12
66
  palette:
@@ -77,9 +96,14 @@ Soil:
77
  - 00120b
78
  select: b0
79
  bandname: b0
 
80
  show: false
81
  Temperature:
82
  name: Temperature
 
 
 
 
83
  roi: ''
84
  gee_path: MODIS/061/MYD21C1
85
  gee_type: image_collection
@@ -119,17 +143,24 @@ Temperature:
119
  select: LST_Day
120
  bandname: LST_Day
121
  dates: true
 
122
  show: true
123
  Habitat:
124
  name: Habitat
 
125
  roi: ''
126
  gee_path: projects/sat-io/open-datasets/IUCN_HABITAT/iucn_habitatclassification_composite_lvl2_ver004
127
  gee_type: image
128
  viz: {}
129
  bandname: comp_first
 
130
  show: true
131
  NDVI:
132
  name: NDVI
 
 
 
 
133
  roi: ''
134
  gee_path: LANDSAT/LC08/C02/T1
135
  gee_type: algebraic
@@ -137,6 +168,8 @@ NDVI:
137
  - B4
138
  - B3
139
  viz:
 
 
140
  palette:
141
  - "#d73027"
142
  - "#f46d43"
@@ -147,15 +180,23 @@ NDVI:
147
  - "#66bd63"
148
  - "#1a9850"
149
  bandname: nd
 
 
150
  NDWI:
151
  name: NDWI
 
 
 
 
152
  roi: ''
153
  gee_path: LANDSAT/LC08/C02/T1
154
  gee_type: algebraic
155
  normalized_difference:
156
- - B3
157
  - B5
 
158
  viz:
 
 
159
  palette:
160
  - "#ece7f2"
161
  - "#d0d1e6"
@@ -166,4 +207,5 @@ NDWI:
166
  - "#045a8d"
167
  - "#023858"
168
  bandname: nd
 
169
  show: true
 
1
  ---
2
  Water:
3
  name: Water
4
+ description: The percent of a given area covered with water.
5
+ unit: percentage
6
+ min: 0
7
+ max: 100
8
  roi: ''
9
  gee_path: JRC/GSW1_1/GlobalSurfaceWater
10
  gee_type: image
 
17
  - 0000ff
18
  bandname: occurrence
19
  select: occurrence
20
+ coefficient: 1
21
  show: true
22
  Protected:
23
  name: Protected
24
+ description: The total PA extent, including both marine (if applicable) and terrestrial areas provided by data provider as specified in the legal text for the site.
25
+ unit: km^2
26
+ min: 0
27
+ max: roi_area
28
  roi: ''
29
  gee_path: WCMC/WDPA/current/polygons
30
  gee_type: feature_collection
 
41
  opacity: 0.8
42
  select: REP_AREA
43
  bandname: constant
44
+ coefficient: 1
45
  show: true
46
  Air:
47
  name: Air
48
+ description: A measure of the prevalence of aerosols in the atmosphere.
49
+ unit: index (continuous)
50
+ min: -21
51
+ max: 39
52
  roi: ''
53
  gee_path: COPERNICUS/S5P/OFFL/L3_AER_AI
54
  gee_type: image_collection
 
66
  bandname: absorbing_aerosol_index
67
  select: absorbing_aerosol_index
68
  dates: false
69
+ coefficient: 1
70
  show: false
71
  Soil:
72
  name: Soil
73
+ description: An estimate of soil organic carbon content at b{n} cm depth.
74
+ unit: 5g/kg
75
+ min: 0
76
+ max: 120
77
  roi: ''
78
  gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
79
  gee_type: image
80
  viz:
81
  bands:
82
+ - b0
83
  min: 0
84
  max: 12
85
  palette:
 
96
  - 00120b
97
  select: b0
98
  bandname: b0
99
+ coefficient: 1
100
  show: false
101
  Temperature:
102
  name: Temperature
103
+ description: Average Daytime Land Surface Temperature
104
+ unit: 0.02 K
105
+ min: 7500
106
+ max: 65535
107
  roi: ''
108
  gee_path: MODIS/061/MYD21C1
109
  gee_type: image_collection
 
143
  select: LST_Day
144
  bandname: LST_Day
145
  dates: true
146
+ coefficient: 1
147
  show: true
148
  Habitat:
149
  name: Habitat
150
+ unit: Classification index (categorical)
151
  roi: ''
152
  gee_path: projects/sat-io/open-datasets/IUCN_HABITAT/iucn_habitatclassification_composite_lvl2_ver004
153
  gee_type: image
154
  viz: {}
155
  bandname: comp_first
156
+ coefficient: 1
157
  show: true
158
  NDVI:
159
  name: NDVI
160
+ description: Normalized difference vegetation index
161
+ unit: index (continuous)
162
+ min: -1
163
+ max: 1
164
  roi: ''
165
  gee_path: LANDSAT/LC08/C02/T1
166
  gee_type: algebraic
 
168
  - B4
169
  - B3
170
  viz:
171
+ min: -1
172
+ max: 1
173
  palette:
174
  - "#d73027"
175
  - "#f46d43"
 
180
  - "#66bd63"
181
  - "#1a9850"
182
  bandname: nd
183
+ coefficient: 1
184
+ show: true
185
  NDWI:
186
  name: NDWI
187
+ description: An estimate of the water content of leaves.
188
+ unit: index (continuous)
189
+ min: -1
190
+ max: 1
191
  roi: ''
192
  gee_path: LANDSAT/LC08/C02/T1
193
  gee_type: algebraic
194
  normalized_difference:
 
195
  - B5
196
+ - B6
197
  viz:
198
+ min: -1
199
+ max: 1
200
  palette:
201
  - "#ece7f2"
202
  - "#d0d1e6"
 
207
  - "#045a8d"
208
  - "#023858"
209
  bandname: nd
210
+ coefficient: 1
211
  show: true
utils/duckdb_queries.py CHANGED
@@ -46,6 +46,12 @@ def get_project_scores(project_name, start_year, end_year):
46
  ).df()
47
 
48
 
 
 
 
 
 
 
49
  def check_if_project_exists_for_year(project_name, year):
50
  return con.execute(
51
  "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
@@ -55,7 +61,7 @@ def check_if_project_exists_for_year(project_name, year):
55
 
56
  def write_score_to_temptable(df):
57
  con.sql(
58
- "CREATE OR REPLACE TABLE _temptable AS SELECT *, ROUND((value * area), 2) AS score FROM (SELECT year, project_name, ROUND(AVG(value), 2) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
59
  )
60
  return True
61
 
@@ -64,7 +70,7 @@ def get_or_create_bioindicator_table():
64
  con.sql(
65
  """
66
  USE climatebase;
67
- CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
68
  """
69
  )
70
  return True
@@ -74,7 +80,7 @@ def upsert_project_record():
74
  con.sql(
75
  """
76
  INSERT INTO bioindicator FROM _temptable
77
- ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
78
  """
79
  )
80
  return True
 
46
  ).df()
47
 
48
 
49
+ def check_if_table_exists(table_name):
50
+ tables = con.execute("SHOW TABLES;").fetchall()
51
+ for i in range(len(tables)):
52
+ tables[i] = tables[i][0]
53
+ return table_name in tables
54
+
55
  def check_if_project_exists_for_year(project_name, year):
56
  return con.execute(
57
  "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
 
61
 
62
  def write_score_to_temptable(df):
63
  con.sql(
64
+ "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, metric, AVG(value * coefficient) AS value, area FROM df GROUP BY year, project_name, metric, area ORDER BY project_name, metric)"
65
  )
66
  return True
67
 
 
70
  con.sql(
71
  """
72
  USE climatebase;
73
+ CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), metric VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name_metric UNIQUE (year, project_name, metric));
74
  """
75
  )
76
  return True
 
80
  con.sql(
81
  """
82
  INSERT INTO bioindicator FROM _temptable
83
+ ON CONFLICT (year, project_name, metric) DO UPDATE SET value = excluded.value;
84
  """
85
  )
86
  return True
utils/indicators.py CHANGED
@@ -16,8 +16,6 @@ from . import logging
16
  GEE_SERVICE_ACCOUNT = (
17
  "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
18
  )
19
- INDICES_FILE = "indices.yaml"
20
-
21
 
22
  class IndexGenerator:
23
  """
@@ -27,20 +25,22 @@ class IndexGenerator:
27
  indices (string[], required): Array of index names to include in aggregate index generation.
28
  """
29
 
30
- def __init__(
31
- self,
32
- indices,
33
- ):
34
  # Authenticate to GEE & DuckDB
35
  self._authenticate_ee(GEE_SERVICE_ACCOUNT)
36
 
 
37
  self.project_name = None
38
  self.project_geometry = None
39
  self.project_centroid = None
40
-
 
 
 
41
  # Use defined subset of indices
42
- all_indices = self._load_indices(INDICES_FILE)
43
- self.indices = {k: all_indices[k] for k in indices}
 
44
 
45
  def set_project(self, project_name):
46
  self.project_name = project_name
@@ -139,6 +139,18 @@ class IndexGenerator:
139
  if not dataset:
140
  raise Exception("Failed to generate dataset.")
141
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  logging.info(f"Generated index: {index_config['name']}")
143
  return dataset
144
 
@@ -163,17 +175,17 @@ class IndexGenerator:
163
  logging.info(f"Calculated zonal mean for {index_key}.")
164
  return out
165
 
166
- def generate_composite_index_df(self, year, indices=[]):
167
  data = {
168
- "metric": indices,
169
  "year": year,
170
  "centroid": "",
171
  "project_name": "",
172
- "value": list(map(self.zonal_mean_index, indices, repeat(year))),
173
  # to-do: calculate with duckdb; also, should be part of project table instead
174
  "area": self.roi.area().getInfo(), # m^2
175
  "geojson": "",
176
- # to-do: coefficient
177
  }
178
 
179
  logging.info("data", data)
@@ -199,9 +211,7 @@ class IndexGenerator:
199
  # to-do: pararelize?
200
  for year in years:
201
  logging.info(year)
202
- df = self.generate_composite_index_df(
203
- year, self.project_geometry, list(self.indices.keys())
204
- )
205
  dfs.append(df)
206
 
207
  # Concatenate all dataframes
@@ -299,7 +309,9 @@ class IndexGenerator:
299
 
300
  def calculate_score(self, start_year, end_year):
301
  years = []
302
- for year in range(start_year, end_year):
 
 
303
  row_exists = dq.check_if_project_exists_for_year(self.project_name, year)
304
  if not row_exists:
305
  years.append(year)
@@ -310,11 +322,20 @@ class IndexGenerator:
310
  # Write score table to `_temptable`
311
  dq.write_score_to_temptable(df)
312
 
313
- # Create `bioindicator` table IF NOT EXISTS.
314
- dq.get_or_create_bioindicator_table()
315
-
316
  # UPSERT project record
317
  dq.upsert_project_record()
318
  logging.info("upserted records into motherduck")
319
  scores = dq.get_project_scores(self.project_name, start_year, end_year)
 
 
 
 
 
 
 
 
 
 
 
 
320
  return scores
 
16
  GEE_SERVICE_ACCOUNT = (
17
  "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
18
  )
 
 
19
 
20
  class IndexGenerator:
21
  """
 
25
  indices (string[], required): Array of index names to include in aggregate index generation.
26
  """
27
 
28
+ def __init__(self):
 
 
 
29
  # Authenticate to GEE & DuckDB
30
  self._authenticate_ee(GEE_SERVICE_ACCOUNT)
31
 
32
+ self.roi = None
33
  self.project_name = None
34
  self.project_geometry = None
35
  self.project_centroid = None
36
+ self.indices = None
37
+ self.metric_name = None
38
+
39
+ def set_metric(self, metric_name):
40
  # Use defined subset of indices
41
+ indices_file = f'metrics/{metric_name.replace(" ", "_")}.yaml'
42
+ self.indices = self._load_indices(indices_file)
43
+ self.metric_name = metric_name
44
 
45
  def set_project(self, project_name):
46
  self.project_name = project_name
 
139
  if not dataset:
140
  raise Exception("Failed to generate dataset.")
141
 
142
+ # Normalize to a range of [0, 1]
143
+ min_val = 0
144
+ max_val = 1
145
+ if type(index_config['min'])==int or type(index_config['min']==float):
146
+ min_val = index_config['min']
147
+ if str(index_config['max'])=='roi_area':
148
+ max_val = self.roi.area().getInfo() # in m^2
149
+ elif type(index_config['max'])==int or type(index_config['max']==float):
150
+ max_val = index_config['max']
151
+ dataset.subtract(min_val)\
152
+ .divide(max_val - min_val)
153
+
154
  logging.info(f"Generated index: {index_config['name']}")
155
  return dataset
156
 
 
175
  logging.info(f"Calculated zonal mean for {index_key}.")
176
  return out
177
 
178
+ def generate_composite_index_df(self, year):
179
  data = {
180
+ "metric": self.metric_name,
181
  "year": year,
182
  "centroid": "",
183
  "project_name": "",
184
+ "value": list(map(self.zonal_mean_index, self.indices, repeat(year))),
185
  # to-do: calculate with duckdb; also, should be part of project table instead
186
  "area": self.roi.area().getInfo(), # m^2
187
  "geojson": "",
188
+ "coefficient": list(map(lambda x: self.indices[x]['coefficient'], self.indices))
189
  }
190
 
191
  logging.info("data", data)
 
211
  # to-do: pararelize?
212
  for year in years:
213
  logging.info(year)
214
+ df = self.generate_composite_index_df(year)
 
 
215
  dfs.append(df)
216
 
217
  # Concatenate all dataframes
 
309
 
310
  def calculate_score(self, start_year, end_year):
311
  years = []
312
+ # Create `bioindicator` table IF NOT EXISTS.
313
+ dq.get_or_create_bioindicator_table()
314
+ for year in range(start_year, end_year+1):
315
  row_exists = dq.check_if_project_exists_for_year(self.project_name, year)
316
  if not row_exists:
317
  years.append(year)
 
322
  # Write score table to `_temptable`
323
  dq.write_score_to_temptable(df)
324
 
 
 
 
325
  # UPSERT project record
326
  dq.upsert_project_record()
327
  logging.info("upserted records into motherduck")
328
  scores = dq.get_project_scores(self.project_name, start_year, end_year)
329
+ scores.columns = scores.columns.str.replace('_', ' ').str.title()
330
+ if 'Area' in scores.columns:
331
+ scores['Area'] /= 1000**2
332
+ scores.rename(columns={'Area':'Area (km^2)'}, inplace=True)
333
+ if 'Score' in scores.columns:
334
+ scores['Score'] /= 1000**2
335
+ scores.rename(columns={'Score': 'Score (Area * Value)'}, inplace=True)
336
+ # Round scores to 4 significant figures
337
+ scores = scores.apply(
338
+ lambda x: ['%.4g'%x_i for x_i in x]
339
+ if pd.api.types.is_numeric_dtype(x)
340
+ else x)
341
  return scores