smart-buildings / src /rtu /RTUPipeline.py
jerin
add seperate RTU anomalizer
66977cd
raw
history blame
3.14 kB
import json
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler
from pickle import load
import numpy as np
class RTUPipeline:
scaler1 = None # RTU 1,2
scaler2 = None # RTU 3,4
def __init__(self, rtus=[1, 2, 3, 4], scaler1_path=None,scaler2_path=None):
outputs = [
"sa_temp",
"oadmpr_pct",
"ra_temp",
"oa_temp",
"ma_temp",
"sf_vfd_spd_fbk_tn",
"rf_vfd_spd_fbk_tn",
]
self.output_col_names = [
"hp_hws_temp",
]
for rtu in rtus:
for output in outputs:
self.output_col_names.append(f"rtu_00{rtu}_{output}")
self.input_col_names = []
for rtu in rtus:
self.input_col_names.append(f"rtu_00{rtu}_sat_sp_tn")
self.input_col_names = self.input_col_names + [
"air_temp_set_1",
"air_temp_set_2",
"dew_point_temperature_set_1d",
"relative_humidity_set_1",
"solar_radiation_set_1",
]
self.num_inputs = len(self.input_col_names)-2
self.num_outputs = len(self.output_col_names)-14
self.column_names = self.output_col_names + self.input_col_names
if scaler1_path:
self.scaler1 = self.get_scaler(scaler1_path)
if scaler2_path:
self.scaler2 = self.get_scaler(scaler2_path)
self.df = pd.DataFrame(columns=self.column_names)
def get_scaler(self, scaler_path):
return joblib.load(scaler_path)
def get_window(self, df):
len_df = len(df)
print(len_df)
if len_df > 30:
df = df.rolling(window=30,min_periods=1).mean()
return df[len_df - 31 : len_df].astype("float32")
else:
return None
def transform_window(self, df_window):
columns_scaler1 = [0] + list(range(1,15)) + [29,30] + list(range(33, 38))
columns_scaler2 = [0] + list(range(15, 29)) + [31,32] + list(range(33, 38))
return self.scaler1.transform(df_window.iloc[:, columns_scaler1]),self.scaler2.transform(df_window.iloc[:, columns_scaler2])
def prepare_input(self, df_trans):
return df_trans[:30, :].reshape((1, 30, len(self.column_names)-16))
def extract_data_from_message(self, message):
payload = json.loads(message.payload.decode())
len_df = len(self.df)
k = {}
for col in self.column_names:
k[col] = payload[col]
self.df.loc[len_df] = k
return self.df
def fit(self, message):
df = self.extract_data_from_message(message)
df_window = self.get_window(df)
if df_window is not None:
df_trans1,df_trans2 = self.transform_window(df_window)
df_new1 = self.prepare_input(df_trans1)
df_new2 = self.prepare_input(df_trans2)
else:
df_new1 = None
df_trans1 = None
df_new2 = None
df_trans2 = None
return df_new1, df_trans1, df_new2, df_trans2