Spaces:
Runtime error
Runtime error
File size: 7,410 Bytes
af358f2 ac675c8 af358f2 aa4cf91 ac675c8 af358f2 ac675c8 af358f2 ac675c8 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 bf051a0 af358f2 ac675c8 bf051a0 af358f2 ac675c8 af358f2 ac675c8 af358f2 aa4cf91 af358f2 ac675c8 aa4cf91 d07468a aa4cf91 bf051a0 ac675c8 aa4cf91 ac675c8 aa4cf91 ac675c8 196cbb2 ac675c8 aa4cf91 ac675c8 8ff070c aa4cf91 ac675c8 aa4cf91 ac675c8 8ff070c aa4cf91 af358f2 ac675c8 af358f2 ac675c8 af358f2 aa4cf91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import os
import xarray as xr
import pandas as pd
from compute_et0_adjusted import compute_et0
# Mapping of variable names to metadata (title, unit, and NetCDF variable key)
VARIABLE_MAPPING = {
'surface_downwelling_shortwave_radiation': ('Surface Downwelling Shortwave Radiation', 'W/m²', 'rsds'),
'moisture_in_upper_portion_of_soil_column': ('Moisture in Upper Portion of Soil Column', 'kg m-2', 'mrsos'),
'precipitation': ('Precipitation', 'kg m-2 s-1', 'pr'),
'near_surface_relative_humidity': ('Relative Humidity', '%', 'hurs'),
'evaporation_including_sublimation_and_transpiration': ('Evaporation (including sublimation and transpiration)', 'kg m-2 s-1', 'evspsbl'),
'total_runoff': ('Total Runoff', 'kg m-2 s-1', 'mrro'),
'daily_minimum_near_surface_air_temperature': ('Daily Minimum Near Surface Air Temperature', '°C', 'tasmin'),
'daily_maximum_near_surface_air_temperature': ('Daily Maximum Near Surface Air Temperature', '°C', 'tasmax'),
'near_surface_wind_speed': ('Near Surface Wind Speed', 'm/s', 'sfcWind'),
'near_surface_air_temperature': ('Near Surface Air Temperature', '°C', 'tas'),
}
# Function to load data for a given variable from the dataset at the nearest latitude and longitude
def load_data(variable: str, ds: xr.Dataset, latitude: float, longitude: float) -> xr.DataArray:
"""
Load data for a given variable from the dataset at the nearest latitude and longitude.
Args:
variable (str): The variable to extract from the dataset.
ds (xr.Dataset): The xarray dataset containing climate data.
latitude(float): Latitude for nearest data point.
longitude (float): Longitude for nearest data point.
Returns:
xr.DataArray: The data array containing the variable values for the specified location.
"""
try:
data = ds[variable].sel(lat=latitude, lon=longitude, method="nearest")
# Convert temperature from Kelvin to Celsius for specific variables
if variable in ["tas", "tasmin", "tasmax"]:
data = data - 273.15
return data
except Exception as e:
print(f"Error loading {variable}: {e}")
return None
# Function to load forecast datasets from NetCDF files based on variable mapping
def get_forecast_datasets(climate_sub_files: list) -> dict:
"""
Get the forecast datasets by loading NetCDF files for each variable.
Args:
climate_sub_files (list): List of file paths to the NetCDF files.
Returns:
dict: Dictionary with variable names as keys and xarray datasets as values.
"""
datasets = {}
for file_path in climate_sub_files:
filename = os.path.basename(file_path)
for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
if var_key in filename:
if var_key in ["tas", "tasmax", "tasmin"]:
if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
else:
datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
return datasets
# Function to extract climate data from forecast datasets and convert to a DataFrame
def get_forecast_data(latitude: float, longitude: float, scenario: str, shading_coef: float = 0) -> pd.DataFrame:
"""
Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
Args:
latitude(float): Latitude of the location to extract data for.
longitude (float): Longitude of the location to extract data for.
scenario (str): The scenario to extract data for.
shading_coef (float, optional): Shading coefficient to use. Defaults to 0 (for no shading)..
Returns:
pd.DataFrame: A DataFrame containing time series data for each variable.
"""
assert scenario in ["moderate", "pessimist"]
assert 0 <= shading_coef <= 1
# Define the directory to parse
folder_to_parse = f"data/climate_data_{scenario}/"
# Retrieve the subfolders and files to parse
climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if
os.path.isdir(os.path.join(folder_to_parse, e))]
climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
# Load the forecast datasets
datasets = get_forecast_datasets(climate_sub_files)
time_series_data = {'time': []}
for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
print(f"Processing {long_name} ({title}, {unit}, {variable})...")
data = load_data(variable, datasets[long_name], latitude, longitude)
if data is not None:
time_series_data['time'] = data.time.values
column_name = f"{title} ({unit})"
time_series_data[column_name] = data.values
forecast_data = pd.DataFrame(time_series_data)
forecast_data = preprocess_forectast_data(forecast_data, latitude, longitude, shading_coef)
return forecast_data
def preprocess_forectast_data(df: pd.DataFrame, latitude, longitude, shading_coef) -> pd.DataFrame:
assert 0 <= shading_coef <= 1
preprocessed_data = df.copy()
preprocessed_data["irradiance"] = preprocessed_data['Surface Downwelling Shortwave Radiation (W/m²)'] * (1 - shading_coef)
preprocessed_data["air_temperature_min"] = preprocessed_data['Daily Minimum Near Surface Air Temperature (°C)']
preprocessed_data["air_temperature_max"] = preprocessed_data['Daily Maximum Near Surface Air Temperature (°C)']
preprocessed_data["relative_humidity_min"] = preprocessed_data['Relative Humidity (%)']
preprocessed_data["relative_humidity_max"] = preprocessed_data['Relative Humidity (%)']
preprocessed_data["wind_speed"] = preprocessed_data['Near Surface Wind Speed (m/s)']
# Convert 'time' to datetime and calculate Julian day
preprocessed_data['time'] = pd.to_datetime(preprocessed_data['time'], errors='coerce')
preprocessed_data['month'] = preprocessed_data['time'].dt.month
preprocessed_data['day_of_year'] = preprocessed_data['time'].dt.dayofyear
# Compute ET0
et0 = compute_et0(preprocessed_data, latitude, longitude)
preprocessed_data['Evaporation (mm/day)'] = et0.clip(lower=0)
# Convert Precipitation from kg/m²/s to mm/day
preprocessed_data['Precipitation (mm/day)'] = 86400 * preprocessed_data['Precipitation (kg m-2 s-1)']
# Calculate Water Deficit: Water Deficit = ET0 - P + M
preprocessed_data['Water Deficit (mm/day)'] = (
preprocessed_data['Evaporation (mm/day)'] - preprocessed_data['Precipitation (mm/day)']
# + preprocessed_data['Moisture in Upper Portion of Soil Column (kg m-2)'])
)
return preprocessed_data
# Main processing workflow
def main():
# Get the forecast data for a specific latitude and longitude
latitude, longitude = 47.0, 5.0 # Example coordinates
scenario = "pessimist"
shading_coef = 0
forecast_data = get_forecast_data(latitude, longitude, scenario=scenario, shading_coef=shading_coef)
# Display the resulting DataFrame
print(forecast_data.head())
print(forecast_data.columns)
return forecast_data
# Run the main function
if __name__ == "__main__":
main()
|