import pandas as pd from pickle import load from datetime import datetime, date from sklearn.preprocessing import StandardScaler import joblib import json import numpy as np class EnergyPredictionPipeline: scaler = None def __init__( self, scaler_path=None, wing="north", bootstrap_data: pd.DataFrame = None ): if scaler_path: self.scaler = self.get_scaler(scaler_path) if wing == "north": self.input_col_names = ["date", "hvac_N"] elif wing == "south": self.input_col_names = ["date", "hvac_S"] self.df = bootstrap_data[self.input_col_names] def get_scaler(self, scaler_path): return joblib.load(scaler_path) def transform_windows(self, df): return self.scaler.transform(df) def add_dimension(self, df): return df.reshape((1, df.shape[0], df.shape[1])) def convert_nan(self, df): return np.nan_to_num(df) def date_encoder(self, df): df["day_of_week"] = df.index.dayofweek df["hour_of_day"] = df.index.hour df["month"] = df.index.month df["day_encoding"] = np.sin(2 * np.pi * df["day_of_week"] / 7) df["hour_encoding"] = np.sin(2 * np.pi * df["hour_of_day"] / 24) df["month_encoding"] = np.sin(2 * np.pi * df["month"] / 12) df.drop(columns=["day_of_week", "hour_of_day", "month"], inplace=True) return df def prepare_input(self, df1: pd.DataFrame): df = df1.copy() df["date"] = pd.to_datetime(df["date"]) df.set_index("date", inplace=True) df = df.resample("60min").mean() df = self.date_encoder(df) df.reset_index(inplace=True, drop=True) df = df.astype("float32") df = df.iloc[-24 * 7 :] return df def extract_data_from_message(self, df): df = df[self.input_col_names] self.df = pd.concat([self.df, df], axis=0) return self.df def get_window(self, df: pd.DataFrame): time = df["date"].iloc[-1] time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S") if time.minute == 0 & time.second == 0: return df else: return None def fit(self, df: pd.DataFrame): df_new = self.extract_data_from_message(df) df_window = self.get_window(df_new) if df_window is not None: df = self.prepare_input(df_window) df = self.transform_windows(df) df = self.convert_nan(df) df = self.add_dimension(df) else: df = None return df