Spaces:

hackathon-ombrea
/

gaia

Runtime error

App Files Files Community

Hugo Massonnat commited on Feb 25

Commit

ac675c8

1 Parent(s): ec9d9e0

update forecast dataframe

Browse files

Files changed (2) hide show

forecast.py +59 -78
requirements.txt +2 -0

forecast.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import os
 import xarray as xr
 import pandas as pd
-from matplotlib import pyplot as plt
-import docs.agro_indicators as agro_indicators
-import numpy as np
-from datetime import datetime
 # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
 VARIABLE_MAPPING = {
@@ -23,21 +20,21 @@ VARIABLE_MAPPING = {
 # Function to load data for a given variable from the dataset at the nearest latitude and longitude
-def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
     """
     Load data for a given variable from the dataset at the nearest latitude and longitude.
     Args:
         variable (str): The variable to extract from the dataset.
         ds (xr.Dataset): The xarray dataset containing climate data.
-        lat (float): Latitude for nearest data point.
-        lon (float): Longitude for nearest data point.
     Returns:
         xr.DataArray: The data array containing the variable values for the specified location.
     """
     try:
-        data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
         # Convert temperature from Kelvin to Celsius for specific variables
         if variable in ["tas", "tasmin", "tasmax"]:
             data = data - 273.15
@@ -74,109 +71,93 @@ def get_forecast_datasets(climate_sub_files: list) -> dict:
 # Function to extract climate data from forecast datasets and convert to a DataFrame
-def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
     """
     Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
     Args:
-        datasets (dict): Dictionary of datasets, one for each variable.
-        lat (float): Latitude of the location to extract data for.
-        lon (float): Longitude of the location to extract data for.
     Returns:
         pd.DataFrame: A DataFrame containing time series data for each variable.
     """
     time_series_data = {'time': []}
     for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
         print(f"Processing {long_name} ({title}, {unit}, {variable})...")
-        data = load_data(variable, datasets[long_name], lat, lon)
         if data is not None:
             time_series_data['time'] = data.time.values
             column_name = f"{title} ({unit})"
             time_series_data[column_name] = data.values
-    return pd.DataFrame(time_series_data)
-# Function to compute reference evapotranspiration (ET0)
-def compute_et0(df: pd.DataFrame, latitude: float, longitude: float):
-    """
-    Compute reference evapotranspiration using the provided climate data.
-    Args:
-        df (pd.DataFrame): DataFrame containing climate data.
-        latitude (float): Latitude of the location.
-        longitude (float): Longitude of the location.
-    Returns:
-        arraylike: Daily reference evapotranspiration.
-    """
-    irradiance = df.irradiance
-    Tmin = df.air_temperature_min
-    Tmax = df.air_temperature_max
-    T = (Tmin + Tmax) / 2  # Average temperature
-    RHmin = df.relative_humidity_min
-    RHmax = df.relative_humidity_max
-    WS = df.wind_speed
-    JJulien = df.day_of_year
-    et0_values = agro_indicators.et0(irradiance, T, Tmax, Tmin, RHmin, RHmax, WS, JJulien, latitude, longitude)
-    return et0_values
-# Main processing workflow
-def main():
-    # Define the directory to parse
-    folder_to_parse = "../climate_data_pessimist/"
-    # Retrieve the subfolders and files to parse
-    climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
-    climate_sub_files  = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
-    # Load the forecast datasets
-    datasets = get_forecast_datasets(climate_sub_files)
-    # Get the forecast data for a specific latitude and longitude
-    lat, lon = 47.0, 5.0  # Example coordinates
-    final_df = get_forecast_data(datasets, lat, lon)
-    coef = 1
-    # Display the resulting DataFrame
-    print(final_df.head())
-    # Preprocess the data
-    data_test = final_df.copy()
-    data_test["irradiance"] = data_test['Surface Downwelling Shortwave Radiation (W/m²)'] * coef
-    data_test["air_temperature_min"] = data_test['Daily Minimum Near Surface Air Temperature (°C)']
-    data_test["air_temperature_max"] = data_test['Daily Maximum Near Surface Air Temperature (°C)']
-    data_test["relative_humidity_min"] = data_test['Relative Humidity (%)']
-    data_test["relative_humidity_max"] = data_test['Relative Humidity (%)']
-    data_test["wind_speed"] = data_test['Near Surface Wind Speed (m/s)']
     # Convert 'time' to datetime and calculate Julian day
-    data_test['time'] = pd.to_datetime(data_test['time'], errors='coerce')
-    data_test['day_of_year'] = data_test['time'].dt.dayofyear
     # Compute ET0
-    et0 = compute_et0(data_test, lat, lon)
-    data_test['Evaporation (mm/day)'] = et0
     # Convert Precipitation from kg/m²/s to mm/day
-    data_test['Precipitation (mm/day)'] = 86400 * data_test['Precipitation (kg m-2 s-1)']
     # Calculate Water Deficit: Water Deficit = ET0 - P + M
-    data_test['Water Deficit (mm/day)'] = (
-        (data_test['Evaporation (mm/day)'] - (data_test['Precipitation (mm/day)']) +
-        data_test['Moisture in Upper Portion of Soil Column (kg m-2)'])
     )
-    # Display the resulting DataFrame with Water Deficit
-    print(data_test[['Water Deficit (mm/day)', 'Precipitation (mm/day)', 'Evaporation (mm/day)', 'Moisture in Upper Portion of Soil Column (kg m-2)']])
-    return data_test
 # Run the main function

 import os
 import xarray as xr
 import pandas as pd
+from compute_et0_adjusted import compute_et0
 # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
 VARIABLE_MAPPING = {
 # Function to load data for a given variable from the dataset at the nearest latitude and longitude
+def load_data(variable: str, ds: xr.Dataset, latitude: float, longitude: float) -> xr.DataArray:
     """
     Load data for a given variable from the dataset at the nearest latitude and longitude.
     Args:
         variable (str): The variable to extract from the dataset.
         ds (xr.Dataset): The xarray dataset containing climate data.
+        latitude(float): Latitude for nearest data point.
+        longitude (float): Longitude for nearest data point.
     Returns:
         xr.DataArray: The data array containing the variable values for the specified location.
     """
     try:
+        data = ds[variable].sel(lat=latitude, lon=longitude, method="nearest")
         # Convert temperature from Kelvin to Celsius for specific variables
         if variable in ["tas", "tasmin", "tasmax"]:
             data = data - 273.15
 # Function to extract climate data from forecast datasets and convert to a DataFrame
+def get_forecast_data(latitude: float, longitude: float, scenario: str, shading_coef: float) -> pd.DataFrame:
     """
     Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
     Args:
+        latitude(float): Latitude of the location to extract data for.
+        longitude (float): Longitude of the location to extract data for.
+        scenario (str): The scenario to extract data for.
     Returns:
         pd.DataFrame: A DataFrame containing time series data for each variable.
     """
+    assert scenario in ["moderate", "pessimist"]
+    assert 0 <= shading_coef <= 1
+    # Define the directory to parse
+    folder_to_parse = f"data/climate_data_{scenario}/"
+    # Retrieve the subfolders and files to parse
+    climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if
+                          os.path.isdir(os.path.join(folder_to_parse, e))]
+    climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
+    # Load the forecast datasets
+    datasets = get_forecast_datasets(climate_sub_files)
     time_series_data = {'time': []}
     for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
         print(f"Processing {long_name} ({title}, {unit}, {variable})...")
+        data = load_data(variable, datasets[long_name], latitude, longitude)
         if data is not None:
             time_series_data['time'] = data.time.values
             column_name = f"{title} ({unit})"
             time_series_data[column_name] = data.values
+    forecast_data = pd.DataFrame(time_series_data)
+    forecast_data = preprocess_forectast_data(forecast_data, latitude, longitude, shading_coef)
+    return pd.DataFrame(time_series_data)
+def preprocess_forectast_data(df: pd.DataFrame, latitude, longitude, shading_coef=0) -> pd.DataFrame:
+    assert 0 <= shading_coef <= 1
+    preprocessed_data = df.copy()
+    preprocessed_data["irradiance"] = preprocessed_data['Surface Downwelling Shortwave Radiation (W/m²)'] * (1 - shading_coef)
+    preprocessed_data["air_temperature_min"] = preprocessed_data['Daily Minimum Near Surface Air Temperature (°C)']
+    preprocessed_data["air_temperature_max"] = preprocessed_data['Daily Maximum Near Surface Air Temperature (°C)']
+    preprocessed_data["relative_humidity_min"] = preprocessed_data['Relative Humidity (%)']
+    preprocessed_data["relative_humidity_max"] = preprocessed_data['Relative Humidity (%)']
+    preprocessed_data["wind_speed"] = preprocessed_data['Near Surface Wind Speed (m/s)']
     # Convert 'time' to datetime and calculate Julian day
+    preprocessed_data['time'] = pd.to_datetime(preprocessed_data['time'], errors='coerce')
+    preprocessed_data['day_of_year'] = preprocessed_data['time'].dt.dayofyear
     # Compute ET0
+    et0 = compute_et0(preprocessed_data, latitude, longitude)
+    preprocessed_data['Evaporation (mm/day)'] = et0
     # Convert Precipitation from kg/m²/s to mm/day
+    preprocessed_data['Precipitation (mm/day)'] = 86400 * preprocessed_data['Precipitation (kg m-2 s-1)']
     # Calculate Water Deficit: Water Deficit = ET0 - P + M
+    preprocessed_data['Water Deficit (mm/day)'] = (
+        (preprocessed_data['Evaporation (mm/day)'] - (preprocessed_data['Precipitation (mm/day)']) +
+         preprocessed_data['Moisture in Upper Portion of Soil Column (kg m-2)'])
     )
+    return preprocessed_data
+# Main processing workflow
+def main():
+    # Get the forecast data for a specific latitude and longitude
+    latitude, longitude = 47.0, 5.0  # Example coordinates
+    scenario = "pessimist"
+    shading_coef = 0
+    forecast_data = get_forecast_data(latitude, longitude, scenario=scenario, shading_coef=shading_coef)
+    # Display the resulting DataFrame
+    print(forecast_data.head())
+    print(forecast_data.columns)
+    return forecast_data
 # Run the main function

requirements.txt CHANGED Viewed

@@ -22,3 +22,5 @@ matplotlib
 xarray
 folium
 netcdf4

 xarray
 folium
 netcdf4
+geopy
+geopandas