File size: 7,410 Bytes
af358f2
 
 
 
ac675c8
af358f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa4cf91
ac675c8
af358f2
 
 
 
 
 
ac675c8
 
af358f2
 
 
 
 
ac675c8
af358f2
 
 
 
 
 
 
 
 
aa4cf91
af358f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa4cf91
af358f2
 
 
 
 
 
 
 
 
aa4cf91
bf051a0
af358f2
 
 
 
ac675c8
 
 
bf051a0
af358f2
 
 
 
ac675c8
 
 
 
 
 
 
 
 
 
 
 
 
 
af358f2
 
 
 
ac675c8
af358f2
aa4cf91
af358f2
 
 
 
ac675c8
 
aa4cf91
d07468a
aa4cf91
 
bf051a0
ac675c8
aa4cf91
ac675c8
 
 
 
 
 
 
aa4cf91
 
ac675c8
196cbb2
ac675c8
aa4cf91
 
ac675c8
8ff070c
aa4cf91
 
ac675c8
aa4cf91
 
ac675c8
8ff070c
 
aa4cf91
af358f2
ac675c8
 
 
 
 
 
 
 
 
 
 
 
 
 
af358f2
ac675c8
af358f2
 
aa4cf91
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import xarray as xr
import pandas as pd

from compute_et0_adjusted import compute_et0

# Mapping of variable names to metadata (title, unit, and NetCDF variable key)
VARIABLE_MAPPING = {
    'surface_downwelling_shortwave_radiation': ('Surface Downwelling Shortwave Radiation', 'W/m²', 'rsds'),
    'moisture_in_upper_portion_of_soil_column': ('Moisture in Upper Portion of Soil Column', 'kg m-2', 'mrsos'),
    'precipitation': ('Precipitation', 'kg m-2 s-1', 'pr'),
    'near_surface_relative_humidity': ('Relative Humidity', '%', 'hurs'),
    'evaporation_including_sublimation_and_transpiration': ('Evaporation (including sublimation and transpiration)', 'kg m-2 s-1', 'evspsbl'),
    'total_runoff': ('Total Runoff', 'kg m-2 s-1', 'mrro'),
    'daily_minimum_near_surface_air_temperature': ('Daily Minimum Near Surface Air Temperature', '°C', 'tasmin'),
    'daily_maximum_near_surface_air_temperature': ('Daily Maximum Near Surface Air Temperature', '°C', 'tasmax'),
    'near_surface_wind_speed': ('Near Surface Wind Speed', 'm/s', 'sfcWind'),
    'near_surface_air_temperature': ('Near Surface Air Temperature', '°C', 'tas'),
}


# Function to load data for a given variable from the dataset at the nearest latitude and longitude
def load_data(variable: str, ds: xr.Dataset, latitude: float, longitude: float) -> xr.DataArray:
    """
    Load data for a given variable from the dataset at the nearest latitude and longitude.

    Args:
        variable (str): The variable to extract from the dataset.
        ds (xr.Dataset): The xarray dataset containing climate data.
        latitude(float): Latitude for nearest data point.
        longitude (float): Longitude for nearest data point.

    Returns:
        xr.DataArray: The data array containing the variable values for the specified location.
    """
    try:
        data = ds[variable].sel(lat=latitude, lon=longitude, method="nearest")
        # Convert temperature from Kelvin to Celsius for specific variables
        if variable in ["tas", "tasmin", "tasmax"]:
            data = data - 273.15
        return data
    except Exception as e:
        print(f"Error loading {variable}: {e}")
        return None


# Function to load forecast datasets from NetCDF files based on variable mapping
def get_forecast_datasets(climate_sub_files: list) -> dict:
    """
    Get the forecast datasets by loading NetCDF files for each variable.

    Args:
        climate_sub_files (list): List of file paths to the NetCDF files.

    Returns:
        dict: Dictionary with variable names as keys and xarray datasets as values.
    """
    datasets = {}

    for file_path in climate_sub_files:
        filename = os.path.basename(file_path)
        for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
            if var_key in filename:
                if var_key in ["tas", "tasmax", "tasmin"]:
                    if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
                        datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
                else:
                    datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")

    return datasets


# Function to extract climate data from forecast datasets and convert to a DataFrame
def get_forecast_data(latitude: float, longitude: float, scenario: str, shading_coef: float = 0) -> pd.DataFrame:
    """
    Extract climate data from the forecast datasets for a given location and convert to a DataFrame.

    Args:
        latitude(float): Latitude of the location to extract data for.
        longitude (float): Longitude of the location to extract data for.
        scenario (str): The scenario to extract data for.
        shading_coef (float, optional): Shading coefficient to use. Defaults to 0 (for no shading)..

    Returns:
        pd.DataFrame: A DataFrame containing time series data for each variable.
    """
    assert scenario in ["moderate", "pessimist"]
    assert 0 <= shading_coef <= 1

    # Define the directory to parse
    folder_to_parse = f"data/climate_data_{scenario}/"

    # Retrieve the subfolders and files to parse
    climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if
                          os.path.isdir(os.path.join(folder_to_parse, e))]
    climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]

    # Load the forecast datasets
    datasets = get_forecast_datasets(climate_sub_files)

    time_series_data = {'time': []}

    for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
        print(f"Processing {long_name} ({title}, {unit}, {variable})...")
        data = load_data(variable, datasets[long_name], latitude, longitude)

        if data is not None:
            time_series_data['time'] = data.time.values
            column_name = f"{title} ({unit})"
            time_series_data[column_name] = data.values

    forecast_data = pd.DataFrame(time_series_data)
    forecast_data = preprocess_forectast_data(forecast_data, latitude, longitude, shading_coef)

    return forecast_data


def preprocess_forectast_data(df: pd.DataFrame, latitude, longitude, shading_coef) -> pd.DataFrame:
    assert 0 <= shading_coef <= 1

    preprocessed_data = df.copy()
    preprocessed_data["irradiance"] = preprocessed_data['Surface Downwelling Shortwave Radiation (W/m²)'] * (1 - shading_coef)
    preprocessed_data["air_temperature_min"] = preprocessed_data['Daily Minimum Near Surface Air Temperature (°C)']
    preprocessed_data["air_temperature_max"] = preprocessed_data['Daily Maximum Near Surface Air Temperature (°C)']
    preprocessed_data["relative_humidity_min"] = preprocessed_data['Relative Humidity (%)']
    preprocessed_data["relative_humidity_max"] = preprocessed_data['Relative Humidity (%)']
    preprocessed_data["wind_speed"] = preprocessed_data['Near Surface Wind Speed (m/s)']

    # Convert 'time' to datetime and calculate Julian day
    preprocessed_data['time'] = pd.to_datetime(preprocessed_data['time'], errors='coerce')
    preprocessed_data['month'] = preprocessed_data['time'].dt.month
    preprocessed_data['day_of_year'] = preprocessed_data['time'].dt.dayofyear

    # Compute ET0
    et0 = compute_et0(preprocessed_data, latitude, longitude)
    preprocessed_data['Evaporation (mm/day)'] = et0.clip(lower=0)

    # Convert Precipitation from kg/m²/s to mm/day
    preprocessed_data['Precipitation (mm/day)'] = 86400 * preprocessed_data['Precipitation (kg m-2 s-1)']

    # Calculate Water Deficit: Water Deficit = ET0 - P + M
    preprocessed_data['Water Deficit (mm/day)'] = (
        preprocessed_data['Evaporation (mm/day)'] - preprocessed_data['Precipitation (mm/day)']
        # + preprocessed_data['Moisture in Upper Portion of Soil Column (kg m-2)'])
    )

    return preprocessed_data


# Main processing workflow
def main():
    # Get the forecast data for a specific latitude and longitude
    latitude, longitude = 47.0, 5.0  # Example coordinates
    scenario = "pessimist"
    shading_coef = 0
    forecast_data = get_forecast_data(latitude, longitude, scenario=scenario, shading_coef=shading_coef)

    # Display the resulting DataFrame
    print(forecast_data.head())
    print(forecast_data.columns)

    return forecast_data


# Run the main function
if __name__ == "__main__":
    main()