Spaces:
No application file
No application file
from gluonts.dataset.multivariate_grouper import MultivariateGrouper | |
from gluonts.time_feature import time_features_from_frequency_str | |
from datasets import load_dataset | |
from functools import lru_cache | |
import pandas as pd | |
import numpy as np | |
from functools import partial | |
from transformers import InformerConfig, InformerForPrediction | |
freq = "1H" | |
prediction_length = 48 | |
def get_train_test_datasets(): | |
def convert_to_pandas_period(date, freq): | |
return pd.Period(date, freq) | |
def transform_start_field(batch, freq): | |
batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]] | |
return batch | |
dataset = load_dataset("monash_tsf", "traffic_hourly") | |
train_dataset = dataset["train"] | |
test_dataset = dataset["test"] | |
train_dataset.set_transform(partial(transform_start_field, freq=freq)) | |
test_dataset.set_transform(partial(transform_start_field, freq=freq)) | |
return train_dataset, test_dataset | |
def get_train_test_multivariate_grouper(train_dataset, test_dataset): | |
num_of_variates = len(train_dataset) | |
train_grouper = MultivariateGrouper(max_target_dim=num_of_variates) | |
test_grouper = MultivariateGrouper( | |
max_target_dim=num_of_variates, | |
num_test_dates=len(test_dataset) // num_of_variates, # number of rolling test windows | |
) | |
return train_grouper, test_grouper | |
def get_informer_model(num_of_variates, time_features): | |
config = InformerConfig( | |
# in the multivariate setting, input_size is the number of variates in the time series per time step | |
input_size=num_of_variates, | |
# prediction length: | |
prediction_length=prediction_length, | |
# context length: | |
context_length=prediction_length * 2, | |
# lags value copied from 1 week before: | |
lags_sequence=[1, 24 * 7], | |
# we'll add 5 time features ("hour_of_day", ..., and "age"): | |
num_time_features=len(time_features) + 1, | |
# informer params: | |
dropout=0.1, | |
encoder_layers=6, | |
decoder_layers=4, | |
# project input from num_of_variates*len(lags_sequence)+num_time_features to: | |
d_model=64, | |
) | |
model = InformerForPrediction(config) | |
return model | |
def main(): | |
train_dataset, test_dataset = get_train_test_datasets() | |
train_grouper, test_grouper = get_train_test_multivariate_grouper(train_dataset, test_dataset) | |
multi_variate_train_dataset = train_grouper(train_dataset) | |
multi_variate_test_dataset = test_grouper(test_dataset) | |
multi_variate_train_example = multi_variate_train_dataset[0] | |
train_example = train_dataset[0] | |
print('train_example["target"].shape =', len(train_example["target"])) | |
print('multi_variate_train_example["target"].shape =', multi_variate_train_example["target"].shape) | |
time_features = time_features_from_frequency_str(freq) | |
print(time_features) | |
informer = get_informer_model(num_of_variates=62, time_features=time_features) | |
if __name__ == '__main__': | |
main() | |