File size: 2,600 Bytes
9627b36
6a62ce9
 
 
 
 
 
9627b36
d0f2767
9627b36
d0f2767
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a62ce9
 
d0f2767
6a62ce9
d0f2767
 
 
 
 
 
 
 
 
9627b36
d0f2767
 
 
9627b36
d0f2767
 
 
6a62ce9
d0f2767
6a62ce9
 
d0f2767
3759ba9
d0f2767
 
6a62ce9
 
3759ba9
6a62ce9
 
d0f2767
 
6a62ce9
 
 
d0f2767
 
 
6a62ce9
 
d0f2767
3759ba9
d0f2767
 
 
 
 
 
6a62ce9
 
d0f2767
3759ba9
 
d0f2767
6a62ce9
 
 
d0f2767
 
 
6a62ce9
 
d0f2767
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import pandas as pd
from pickle import load
from datetime import datetime, date
from sklearn.preprocessing import StandardScaler
import joblib
import json
import numpy as np


class EnergyPredictionPipeline:
    scaler = None

    def __init__(
        self, scaler_path=None, wing="north", bootstrap_data: pd.DataFrame = None
    ):

        if scaler_path:
            self.scaler = self.get_scaler(scaler_path)

        if wing == "north":
            self.input_col_names = ["date", "hvac_N"]
        elif wing == "south":
            self.input_col_names = ["date", "hvac_S"]

        self.df = bootstrap_data[self.input_col_names]

    def get_scaler(self, scaler_path):
        return joblib.load(scaler_path)

    def transform_windows(self, df):
        return self.scaler.transform(df)

    def add_dimension(self, df):
        return df.reshape((1, df.shape[0], df.shape[1]))

    def convert_nan(self, df):
        return np.nan_to_num(df)

    def date_encoder(self, df):

        df["day_of_week"] = df.index.dayofweek
        df["hour_of_day"] = df.index.hour
        df["month"] = df.index.month

        df["day_encoding"] = np.sin(2 * np.pi * df["day_of_week"] / 7)
        df["hour_encoding"] = np.sin(2 * np.pi * df["hour_of_day"] / 24)
        df["month_encoding"] = np.sin(2 * np.pi * df["month"] / 12)

        df.drop(columns=["day_of_week", "hour_of_day", "month"], inplace=True)

        return df

    def prepare_input(self, df1: pd.DataFrame):

        df = df1.copy()
        df["date"] = pd.to_datetime(df["date"])
        df.set_index("date", inplace=True)
        df = df.resample("60min").mean()
        df = self.date_encoder(df)
        df.reset_index(inplace=True, drop=True)
        df = df.astype("float32")
        df = df.iloc[-24 * 7 :]

        return df

    def extract_data_from_message(self, df):
        df = df[self.input_col_names]
        self.df = pd.concat([self.df, df], axis=0)

        return self.df

    def get_window(self, df: pd.DataFrame):

        time = df["date"].iloc[-1]
        time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")

        if time.minute == 0 & time.second == 0:
            return df
        else:
            return None

    def fit(self, df: pd.DataFrame):
        df_new = self.extract_data_from_message(df)
        df_window = self.get_window(df_new)
        if df_window is not None:
            df = self.prepare_input(df_window)
            df = self.transform_windows(df)
            df = self.convert_nan(df)
            df = self.add_dimension(df)

        else:
            df = None

        return df