In [None]:
"""
Import necessary libraries and modules.
"""
import pandas as pd 
from datetime import date
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from keras.callbacks import ModelCheckpoint
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from tensorflow.keras.models import load_model


In [None]:
"""
Load the dataset
"""

merged = pd.read_csv('long_merge.csv')

zone = "47"

if zone in ["36", "37", "38", "39", "40", "41", "42", "64", "65", "66", "67", "68", "69", "70"]:
    rtu = "rtu_001"
    wing = "hvac_N"
elif zone in ["18", "25", "26", "45", "48", "55", "56", "61"]:
    rtu = "rtu_003"
    wing = "hvac_S"
elif zone in ["16", "17", "21", "22", "23", "24", "46", "47", "51", "52", "53", "54"]:
    rtu = "rtu_004"
    wing = "hvac_S"
else:
    rtu = "rtu_002"
    wing = "hvac_N"
sorted = merged[["date"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+["hp_hws_temp", "aru_001_cwr_temp" , "aru_001_cws_fr_gpm" ,"aru_001_cws_temp","aru_001_hwr_temp" ,"aru_001_hws_fr_gpm" ,"aru_001_hws_temp"]]
sorted

In [None]:
"""
Load the dataset
"""

rtu = ["rtu_003","rtu_004","rtu_001","rtu_002"]
env = ["air_temp_set_1","air_temp_set_2","dew_point_temperature_set_1d","relative_humidity_set_1","solar_radiation_set_1"]
energy_data = merged[["date","hp_hws_temp"]+[col for col in merged.columns if 
                               any(sub in col for sub in rtu)]+env]
df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]
df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]
df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]
df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = "%Y-%m-%d %H:%M:%S")
df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2018, 5, 1)) & (df_filtered.date.dt.date< date(2020, 5, 1))] #(2018, 5, 1)
if df_filtered.isna().any().any():
    print("There are NA values",df_filtered.isna().sum().tolist())

In [None]:
"""
Extract the relevant parameters
"""


df_filtered = df_filtered.loc[:,['date','hp_hws_temp',
 'rtu_003_sa_temp',
 'rtu_003_oadmpr_pct',
 'rtu_003_ra_temp',
 'rtu_003_oa_temp',
 'rtu_003_ma_temp',
 'rtu_003_sf_vfd_spd_fbk_tn',
 'rtu_003_rf_vfd_spd_fbk_tn',
 'rtu_004_sa_temp',
 'rtu_004_oadmpr_pct',
 'rtu_004_ra_temp',
 'rtu_004_oa_temp',
 'rtu_004_ma_temp',
 'rtu_004_sf_vfd_spd_fbk_tn',
 'rtu_004_rf_vfd_spd_fbk_tn',
 'rtu_003_sat_sp_tn',
 'rtu_004_sat_sp_tn',
 
#  'rtu_001_sa_temp',
#  'rtu_001_oadmpr_pct',
#  'rtu_001_ra_temp',
#  'rtu_001_oa_temp',
#  'rtu_001_ma_temp',
#  'rtu_001_sf_vfd_spd_fbk_tn',
#  'rtu_001_rf_vfd_spd_fbk_tn',
#  'rtu_002_sa_temp',
#  'rtu_002_oadmpr_pct',
#  'rtu_002_ra_temp',
#  'rtu_002_oa_temp',
#  'rtu_002_ma_temp',
#  'rtu_002_sf_vfd_spd_fbk_tn',
#  'rtu_002_rf_vfd_spd_fbk_tn',
#  'rtu_001_sat_sp_tn',
#  'rtu_002_sat_sp_tn',

 'air_temp_set_1',
 'air_temp_set_2',
 'dew_point_temperature_set_1d',
 'relative_humidity_set_1',
 'solar_radiation_set_1']]

In [None]:
"""
splitting into training and testing sets.
"""

df_filtered = df_filtered.dropna()

testdataset_df = df_filtered[(df_filtered.date.dt.date >date(2019, 7, 21))]

traindataset_df = df_filtered[(df_filtered.date.dt.date <date(2019, 7, 21))]

testdataset = testdataset_df.drop(columns=["date"]).rolling(window=30,min_periods=1).mean().values

traindataset = traindataset_df.drop(columns=["date"]).rolling(window=30,min_periods=1).mean().values

columns_with_na_train = traindataset_df.columns[traindataset_df.isna().any()].tolist()
columns_with_na_test = testdataset_df.columns[testdataset_df.isna().any()].tolist()
print(columns_with_na_train)
print(columns_with_na_test)

In [None]:

"""
Preprocess the data by scaling
"""

traindataset = traindataset.astype('float32')
testdataset = testdataset.astype('float32')

scaler = StandardScaler()
traindataset = scaler.fit_transform(traindataset)
testdataset = scaler.transform(testdataset)

In [None]:
"""
Training the model
"""


train,test = traindataset,testdataset

def create_dataset(dataset,time_step):
    x = [[] for _ in range(22)] 
    Y = []
    for i in range(len(dataset) - time_step - 1):
        for j in range(22):
            x[j].append(dataset[i:(i + time_step), j])
        Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],
                  dataset[i + time_step, 4],dataset[i + time_step, 5],
                  dataset[i + time_step, 6],dataset[i + time_step, 7],
                  dataset[i + time_step, 8],dataset[i + time_step, 9],dataset[i + time_step, 10],
                  dataset[i + time_step, 11],dataset[i + time_step, 12],
                  dataset[i + time_step, 13],dataset[i + time_step, 14]])
    x= [np.array(feature_list) for feature_list in x]
    Y = np.reshape(Y,(len(Y),15))
    return np.stack(x,axis=2),Y

time_step = 30
X_train, y_train = create_dataset(train, time_step)
X_test, y_test = create_dataset(test, time_step)


model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50, return_sequences=True))
model.add(LSTM(units=30))
model.add(Dense(units=15))

model.compile(optimizer='adam', loss='mean_squared_error')

checkpoint_path = "lstm_2rtu_smooth_04.keras" #           "lstm_2rtu_smooth_03.keras"--> 3,4 rtu
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])

In [None]:
"""
load the model for prediction
"""

checkpoint_path = "lstm_2rtu_smooth_03.keras"
model = load_model(checkpoint_path)
test_predict1 = model.predict(X_test)
train_predict1 = model.predict(X_train)

In [None]:
%matplotlib qt
var = 6
plt.plot(testdataset_df['date'][31:], y_test[:,var], label='Original Testing Data')
plt.plot(testdataset_df['date'][31:] ,test_predict1[:,var], label='Predicted Data')

# anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)
# plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker ="o",s=100 )


plt.title('Testing Data - Predicted vs Actual')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
"""
    Perform clustering and dimensionality reduction using KMeans and PCA
"""

np.random.seed(0)
X1 = train_predict1[:,1:8] - y_train[:,1:8]
X2 = train_predict1[:,8:15] - y_train[:,8:15]


k = 1

pca1 = PCA(n_components=2)
X1 = pca1.fit_transform(X1)
pca2 = PCA(n_components=2)
X2 = pca2.fit_transform(X2)

kmeans1 = KMeans(n_clusters=k, random_state=10) 
kmeans1.fit(X1)
kmeans2 = KMeans(n_clusters=k, random_state=10) 
kmeans2.fit(X2)


labels = kmeans1.labels_
plt.scatter(X1[:, 0], X1[:, 1],c=labels, cmap='rainbow')
plt.scatter(kmeans1.cluster_centers_[:, 0], kmeans1.cluster_centers_[:, 1], marker='x', c='red', s=200, linewidths=2)
plt.title('KMeans Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

In [None]:
"""
    Plot the faults after windowing
"""

%matplotlib qt

distance1 = np.linalg.norm((pca1.transform(test_predict1[:,1:8]-y_test[:,1:8]))-kmeans1.cluster_centers_[0], ord=2, axis = 1)
distance2 = np.linalg.norm((pca2.transform(test_predict1[:,8:15]-y_test[:,8:15]))-kmeans2.cluster_centers_[0], ord=2, axis = 1)

plt.plot(testdataset_df['date'][31:] ,y_test[:,7],label='Return air fan speed')
plt.plot(testdataset_df['date'][31:] ,abs(distance1)>1,linewidth=2.5,label='Faults')
plt.plot(testdataset_df['date'][31:] ,pd.Series((distance1)>3.5).rolling(window=60,min_periods=1).mean()==1,linewidth=2.5,label='Faults')

plt.title('RTU-2 Faults')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()
