Spaces:

openbio
/

calculator

Build error

App Files Files Community

mbuuck commited on Jul 25, 2023

Commit

12fd562

1 Parent(s): fae8684

Added coefficients/normalizations. Added rounding.

Browse files

Files changed (5) hide show

app.py +14 -3
metrics/OE_Biodiversity.yaml +109 -0
indices.yaml → unused_metrics/OE_Biodiversity_20230723.yaml +44 -2
utils/duckdb_queries.py +9 -3
utils/indicators.py +41 -20

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 from utils import duckdb_queries as dq
@@ -5,13 +6,18 @@ from utils.gradio import get_window_url_params
 from utils.indicators import IndexGenerator
 # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
-indexgenerator = IndexGenerator(indices=["NDWI", "Water", "Protected", "Habitat"])
 with gr.Blocks() as demo:
     with gr.Column():
         m1 = gr.Plot()
         with gr.Row():
             project_name = gr.Dropdown([], label="Project", value="Select project")
             start_year = gr.Number(value=2017, label="Start Year", precision=0)
             end_year = gr.Number(value=2022, label="End Year", precision=0)
         with gr.Row():
@@ -37,8 +43,7 @@ with gr.Blocks() as demo:
         projects = dq.list_projects_by_author(author_id=username)
         # Initialize the first project in the list
         project_names = projects['name'].tolist()
-        default_project = project_names[0]
-        return gr.Dropdown.update(choices=project_names, value=default_project)
     # Change the project name in the index generator object when the
     # user selects a new project
@@ -47,6 +52,12 @@ with gr.Blocks() as demo:
         inputs=project_name
     )
     # Get url params
     url_params = gr.JSON({"username": "default"}, visible=False, label="URL Params")

+import os
 import gradio as gr
 from utils import duckdb_queries as dq
 from utils.indicators import IndexGenerator
 # Instantiate outside gradio app to avoid re-initializing GEE, which is slow
+indexgenerator = IndexGenerator()
+metric_names = os.listdir('metrics')
+for i in range(len(metric_names)):
+    metric_names[i] = metric_names[i].split('.yaml')[0].replace('_', ' ')
 with gr.Blocks() as demo:
     with gr.Column():
         m1 = gr.Plot()
         with gr.Row():
             project_name = gr.Dropdown([], label="Project", value="Select project")
+            metric = gr.Dropdown(metric_names, label='Metric', value='Select metric')
             start_year = gr.Number(value=2017, label="Start Year", precision=0)
             end_year = gr.Number(value=2022, label="End Year", precision=0)
         with gr.Row():
         projects = dq.list_projects_by_author(author_id=username)
         # Initialize the first project in the list
         project_names = projects['name'].tolist()
+        return gr.Dropdown.update(choices=project_names)
     # Change the project name in the index generator object when the
     # user selects a new project
         inputs=project_name
     )
+    # Set the metric to be calculated
+    metric.change(
+        indexgenerator.set_metric,
+        inputs=metric
+    )
     # Get url params
     url_params = gr.JSON({"username": "default"}, visible=False, label="URL Params")

metrics/OE_Biodiversity.yaml ADDED Viewed

	@@ -0,0 +1,109 @@

+---
+Protected:
+  name: Protected
+  description: The total PA extent, including both marine (if applicable) and terrestrial areas provided by data provider as specified in the legal text for the site.
+  unit: km^2
+  min: 0
+  max: roi_area
+  roi: ''
+  gee_path: WCMC/WDPA/current/polygons
+  gee_type: feature_collection
+  viz:
+    palette:
+    - 2ed033
+    - 5aff05
+    - 67b9ff
+    - 5844ff
+    - 0a7618
+    - 2c05ff
+    min: 0
+    max: 1550000
+    opacity: 0.8
+  select: REP_AREA
+  bandname: constant
+  coefficient: 1
+  show: true
+Soil:
+  name: Soil
+  description: An estimate of soil organic carbon content at b{n} cm depth.
+  unit: 5g/kg
+  min: 0
+  max: 120
+  roi: ''
+  gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
+  gee_type: image
+  viz:
+    bands:
+    - b0
+    min: 0
+    max: 12
+    palette:
+    - ffffa0
+    - f7fcb9
+    - d9f0a3
+    - addd8e
+    - 78c679
+    - 41ab5d
+    - '238443'
+    - 005b29
+    - 004b29
+    - 012b13
+    - 00120b
+  select: b0
+  bandname: b0
+  coefficient: 1
+  show: false
+NDVI:
+  name: NDVI
+  description: Normalized difference vegetation index
+  unit: index (continuous)
+  min: -1
+  max: 1
+  roi: ''
+  gee_path: LANDSAT/LC08/C02/T1
+  gee_type: algebraic
+  normalized_difference:
+  - B4
+  - B3
+  viz:
+    min: -1
+    max: 1
+    palette:
+    - "#d73027"
+    - "#f46d43"
+    - "#fdae61"
+    - "#fee08b"
+    - "#d9ef8b"
+    - "#a6d96a"
+    - "#66bd63"
+    - "#1a9850"
+  bandname: nd
+  coefficient: 1
+  show: true
+NDWI:
+  name: NDWI
+  description: An estimate of the water content of leaves.
+  unit: index (continuous)
+  min: -1
+  max: 1
+  roi: ''
+  gee_path: LANDSAT/LC08/C02/T1
+  gee_type: algebraic
+  normalized_difference:
+  - B5
+  - B6
+  viz:
+    min: -1
+    max: 1
+    palette:
+    - "#ece7f2"
+    - "#d0d1e6"
+    - "#a6bddb"
+    - "#74a9cf"
+    - "#3690c0"
+    - "#0570b0"
+    - "#045a8d"
+    - "#023858"
+  bandname: nd
+  coefficient: 1
+  show: true

indices.yaml → unused_metrics/OE_Biodiversity_20230723.yaml RENAMED Viewed

@@ -1,6 +1,10 @@
 ---
 Water:
   name: Water
   roi: ''
   gee_path: JRC/GSW1_1/GlobalSurfaceWater
   gee_type: image
@@ -13,9 +17,14 @@ Water:
     - 0000ff
   bandname: occurrence
   select: occurrence
   show: true
 Protected:
   name: Protected
   roi: ''
   gee_path: WCMC/WDPA/current/polygons
   gee_type: feature_collection
@@ -32,9 +41,14 @@ Protected:
     opacity: 0.8
   select: REP_AREA
   bandname: constant
   show: true
 Air:
   name: Air
   roi: ''
   gee_path: COPERNICUS/S5P/OFFL/L3_AER_AI
   gee_type: image_collection
@@ -52,15 +66,20 @@ Air:
   bandname: absorbing_aerosol_index
   select: absorbing_aerosol_index
   dates: false
   show: false
 Soil:
   name: Soil
   roi: ''
   gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
   gee_type: image
   viz:
     bands:
-    - b200
     min: 0
     max: 12
     palette:
@@ -77,9 +96,14 @@ Soil:
     - 00120b
   select: b0
   bandname: b0
   show: false
 Temperature:
   name: Temperature
   roi: ''
   gee_path: MODIS/061/MYD21C1
   gee_type: image_collection
@@ -119,17 +143,24 @@ Temperature:
   select: LST_Day
   bandname: LST_Day
   dates: true
   show: true
 Habitat:
   name: Habitat
   roi: ''
   gee_path: projects/sat-io/open-datasets/IUCN_HABITAT/iucn_habitatclassification_composite_lvl2_ver004
   gee_type: image
   viz: {}
   bandname: comp_first
   show: true
 NDVI:
   name: NDVI
   roi: ''
   gee_path: LANDSAT/LC08/C02/T1
   gee_type: algebraic
@@ -137,6 +168,8 @@ NDVI:
   - B4
   - B3
   viz:
     palette:
     - "#d73027"
     - "#f46d43"
@@ -147,15 +180,23 @@ NDVI:
     - "#66bd63"
     - "#1a9850"
   bandname: nd
 NDWI:
   name: NDWI
   roi: ''
   gee_path: LANDSAT/LC08/C02/T1
   gee_type: algebraic
   normalized_difference:
-  - B3
   - B5
   viz:
     palette:
     - "#ece7f2"
     - "#d0d1e6"
@@ -166,4 +207,5 @@ NDWI:
     - "#045a8d"
     - "#023858"
   bandname: nd
   show: true

 ---
 Water:
   name: Water
+  description: The percent of a given area covered with water.
+  unit: percentage
+  min: 0
+  max: 100
   roi: ''
   gee_path: JRC/GSW1_1/GlobalSurfaceWater
   gee_type: image
     - 0000ff
   bandname: occurrence
   select: occurrence
+  coefficient: 1
   show: true
 Protected:
   name: Protected
+  description: The total PA extent, including both marine (if applicable) and terrestrial areas provided by data provider as specified in the legal text for the site.
+  unit: km^2
+  min: 0
+  max: roi_area
   roi: ''
   gee_path: WCMC/WDPA/current/polygons
   gee_type: feature_collection
     opacity: 0.8
   select: REP_AREA
   bandname: constant
+  coefficient: 1
   show: true
 Air:
   name: Air
+  description: A measure of the prevalence of aerosols in the atmosphere.
+  unit: index (continuous)
+  min: -21
+  max: 39
   roi: ''
   gee_path: COPERNICUS/S5P/OFFL/L3_AER_AI
   gee_type: image_collection
   bandname: absorbing_aerosol_index
   select: absorbing_aerosol_index
   dates: false
+  coefficient: 1
   show: false
 Soil:
   name: Soil
+  description: An estimate of soil organic carbon content at b{n} cm depth.
+  unit: 5g/kg
+  min: 0
+  max: 120
   roi: ''
   gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
   gee_type: image
   viz:
     bands:
+    - b0
     min: 0
     max: 12
     palette:
     - 00120b
   select: b0
   bandname: b0
+  coefficient: 1
   show: false
 Temperature:
   name: Temperature
+  description: Average Daytime Land Surface Temperature
+  unit: 0.02 K
+  min: 7500
+  max: 65535
   roi: ''
   gee_path: MODIS/061/MYD21C1
   gee_type: image_collection
   select: LST_Day
   bandname: LST_Day
   dates: true
+  coefficient: 1
   show: true
 Habitat:
   name: Habitat
+  unit: Classification index (categorical)
   roi: ''
   gee_path: projects/sat-io/open-datasets/IUCN_HABITAT/iucn_habitatclassification_composite_lvl2_ver004
   gee_type: image
   viz: {}
   bandname: comp_first
+  coefficient: 1
   show: true
 NDVI:
   name: NDVI
+  description: Normalized difference vegetation index
+  unit: index (continuous)
+  min: -1
+  max: 1
   roi: ''
   gee_path: LANDSAT/LC08/C02/T1
   gee_type: algebraic
   - B4
   - B3
   viz:
+    min: -1
+    max: 1
     palette:
     - "#d73027"
     - "#f46d43"
     - "#66bd63"
     - "#1a9850"
   bandname: nd
+  coefficient: 1
+  show: true
 NDWI:
   name: NDWI
+  description: An estimate of the water content of leaves.
+  unit: index (continuous)
+  min: -1
+  max: 1
   roi: ''
   gee_path: LANDSAT/LC08/C02/T1
   gee_type: algebraic
   normalized_difference:
   - B5
+  - B6
   viz:
+    min: -1
+    max: 1
     palette:
     - "#ece7f2"
     - "#d0d1e6"
     - "#045a8d"
     - "#023858"
   bandname: nd
+  coefficient: 1
   show: true

utils/duckdb_queries.py CHANGED Viewed

@@ -46,6 +46,12 @@ def get_project_scores(project_name, start_year, end_year):
     ).df()
 def check_if_project_exists_for_year(project_name, year):
     return con.execute(
         "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
@@ -55,7 +61,7 @@ def check_if_project_exists_for_year(project_name, year):
 def write_score_to_temptable(df):
     con.sql(
-        "CREATE OR REPLACE TABLE _temptable AS SELECT *, ROUND((value * area), 2) AS score FROM (SELECT year, project_name, ROUND(AVG(value), 2) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
     )
     return True
@@ -64,7 +70,7 @@ def get_or_create_bioindicator_table():
     con.sql(
         """
             USE climatebase;
-            CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
             """
     )
     return True
@@ -74,7 +80,7 @@ def upsert_project_record():
     con.sql(
         """
                 INSERT INTO bioindicator FROM _temptable
-                ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
             """
     )
     return True

     ).df()
+def check_if_table_exists(table_name):
+    tables = con.execute("SHOW TABLES;").fetchall()
+    for i in range(len(tables)):
+        tables[i] = tables[i][0]
+    return table_name in tables
 def check_if_project_exists_for_year(project_name, year):
     return con.execute(
         "SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
 def write_score_to_temptable(df):
     con.sql(
+        "CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, metric, AVG(value * coefficient) AS value, area FROM df GROUP BY year, project_name, metric, area ORDER BY project_name, metric)"
     )
     return True
     con.sql(
         """
             USE climatebase;
+            CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), metric VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name_metric UNIQUE (year, project_name, metric));
             """
     )
     return True
     con.sql(
         """
                 INSERT INTO bioindicator FROM _temptable
+                ON CONFLICT (year, project_name, metric) DO UPDATE SET value = excluded.value;
             """
     )
     return True

utils/indicators.py CHANGED Viewed

@@ -16,8 +16,6 @@ from . import logging
 GEE_SERVICE_ACCOUNT = (
     "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
 )
-INDICES_FILE = "indices.yaml"
 class IndexGenerator:
     """
@@ -27,20 +25,22 @@ class IndexGenerator:
             indices (string[], required): Array of index names to include in aggregate index generation.
     """
-    def __init__(
-        self,
-        indices,
-    ):
         # Authenticate to GEE & DuckDB
         self._authenticate_ee(GEE_SERVICE_ACCOUNT)
         self.project_name = None
         self.project_geometry = None
         self.project_centroid = None
         # Use defined subset of indices
-        all_indices = self._load_indices(INDICES_FILE)
-        self.indices = {k: all_indices[k] for k in indices}
     def set_project(self, project_name):
         self.project_name = project_name
@@ -139,6 +139,18 @@ class IndexGenerator:
         if not dataset:
             raise Exception("Failed to generate dataset.")
         logging.info(f"Generated index: {index_config['name']}")
         return dataset
@@ -163,17 +175,17 @@ class IndexGenerator:
         logging.info(f"Calculated zonal mean for {index_key}.")
         return out
-    def generate_composite_index_df(self, year, indices=[]):
         data = {
-            "metric": indices,
             "year": year,
             "centroid": "",
             "project_name": "",
-            "value": list(map(self.zonal_mean_index, indices, repeat(year))),
             # to-do: calculate with duckdb; also, should be part of project table instead
             "area": self.roi.area().getInfo(),  # m^2
             "geojson": "",
-            # to-do: coefficient
         }
         logging.info("data", data)
@@ -199,9 +211,7 @@ class IndexGenerator:
         # to-do: pararelize?
         for year in years:
             logging.info(year)
-            df = self.generate_composite_index_df(
-                year, self.project_geometry, list(self.indices.keys())
-            )
             dfs.append(df)
         # Concatenate all dataframes
@@ -299,7 +309,9 @@ class IndexGenerator:
     def calculate_score(self, start_year, end_year):
         years = []
-        for year in range(start_year, end_year):
             row_exists = dq.check_if_project_exists_for_year(self.project_name, year)
             if not row_exists:
                 years.append(year)
@@ -310,11 +322,20 @@ class IndexGenerator:
             # Write score table to `_temptable`
             dq.write_score_to_temptable(df)
-            # Create `bioindicator` table IF NOT EXISTS.
-            dq.get_or_create_bioindicator_table()
             # UPSERT project record
             dq.upsert_project_record()
             logging.info("upserted records into motherduck")
         scores = dq.get_project_scores(self.project_name, start_year, end_year)
         return scores

 GEE_SERVICE_ACCOUNT = (
     "climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
 )
 class IndexGenerator:
     """
             indices (string[], required): Array of index names to include in aggregate index generation.
     """
+    def __init__(self):
         # Authenticate to GEE & DuckDB
         self._authenticate_ee(GEE_SERVICE_ACCOUNT)
+        self.roi = None
         self.project_name = None
         self.project_geometry = None
         self.project_centroid = None
+        self.indices = None
+        self.metric_name = None
+    def set_metric(self, metric_name):
         # Use defined subset of indices
+        indices_file = f'metrics/{metric_name.replace(" ", "_")}.yaml'
+        self.indices = self._load_indices(indices_file)
+        self.metric_name = metric_name
     def set_project(self, project_name):
         self.project_name = project_name
         if not dataset:
             raise Exception("Failed to generate dataset.")
+        # Normalize to a range of [0, 1]
+        min_val = 0
+        max_val = 1
+        if type(index_config['min'])==int or type(index_config['min']==float):
+            min_val = index_config['min']
+        if str(index_config['max'])=='roi_area':
+            max_val = self.roi.area().getInfo() # in m^2
+        elif type(index_config['max'])==int or type(index_config['max']==float):
+            max_val = index_config['max']
+        dataset.subtract(min_val)\
+            .divide(max_val - min_val)
         logging.info(f"Generated index: {index_config['name']}")
         return dataset
         logging.info(f"Calculated zonal mean for {index_key}.")
         return out
+    def generate_composite_index_df(self, year):
         data = {
+            "metric": self.metric_name,
             "year": year,
             "centroid": "",
             "project_name": "",
+            "value": list(map(self.zonal_mean_index, self.indices, repeat(year))),
             # to-do: calculate with duckdb; also, should be part of project table instead
             "area": self.roi.area().getInfo(),  # m^2
             "geojson": "",
+            "coefficient": list(map(lambda x: self.indices[x]['coefficient'], self.indices))
         }
         logging.info("data", data)
         # to-do: pararelize?
         for year in years:
             logging.info(year)
+            df = self.generate_composite_index_df(year)
             dfs.append(df)
         # Concatenate all dataframes
     def calculate_score(self, start_year, end_year):
         years = []
+        # Create `bioindicator` table IF NOT EXISTS.
+        dq.get_or_create_bioindicator_table()
+        for year in range(start_year, end_year+1):
             row_exists = dq.check_if_project_exists_for_year(self.project_name, year)
             if not row_exists:
                 years.append(year)
             # Write score table to `_temptable`
             dq.write_score_to_temptable(df)
             # UPSERT project record
             dq.upsert_project_record()
             logging.info("upserted records into motherduck")
         scores = dq.get_project_scores(self.project_name, start_year, end_year)
+        scores.columns = scores.columns.str.replace('_', ' ').str.title()
+        if 'Area' in scores.columns:
+            scores['Area'] /= 1000**2
+            scores.rename(columns={'Area':'Area (km^2)'}, inplace=True)
+        if 'Score' in scores.columns:
+            scores['Score'] /= 1000**2
+            scores.rename(columns={'Score': 'Score (Area * Value)'}, inplace=True)
+        # Round scores to 4 significant figures
+        scores = scores.apply(
+            lambda x: ['%.4g'%x_i for x_i in x]
+                if pd.api.types.is_numeric_dtype(x)
+                else x)
         return scores