In [98]:
import pandas as pd 
from datetime import datetime 
from datetime import date
import matplotlib.pyplot as plt
# import seaborn as sns
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from keras.callbacks import ModelCheckpoint

dataPATH = r"C:\Users\levim\OneDrive\Documents\MastersAI_ES\TeamProject-5ARIP10\smart-buildings\Data"
all_data = pd.read_csv(dataPATH + r"\long_merge.csv")

In [102]:
feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']
extended_energy_data = all_data[feature_list]
extended_energy_data.head()

Unnamed: 0,date,hvac_N,hvac_S,air_temp_set_1,solar_radiation_set_1
0,2018-01-01 00:00:00,,,11.64,86.7
1,2018-01-01 00:01:00,,,11.64,86.7
2,2018-01-01 00:02:00,,,11.64,86.7
3,2018-01-01 00:03:00,,,11.64,86.7
4,2018-01-01 00:04:00,,,11.64,86.7


In [None]:
energy_data = pd.read_csv(dataPATH + r"\hvac_data_1h.csv")

# Convert the date column to datetime
energy_data['date'] = pd.to_datetime(energy_data['date'], format = "%Y-%m-%d %H:%M:%S")

energy_data['day_of_week'] = energy_data['date'].dt.weekday
# Filter the data for the year 2019
df_filtered = energy_data[ (energy_data.date.dt.date >date(2019, 1, 20)) & (energy_data.date.dt.date< date(2019, 7, 26))]

# Check for NA values in the DataFrame
if df_filtered.isna().any().any():
    print("There are NA values in the DataFrame columns.")

In [None]:
testdataset_df = df_filtered[(df_filtered.date.dt.date <date(2019, 2, 20))]

traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 2, 21))]

testdataset = testdataset_df.drop(columns=["date"]).values

traindataset = traindataset_df.drop(columns=["date"]).values

columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()
columns_with_na

In [None]:
traindataset = traindataset.astype('float32')
testdataset = testdataset.astype('float32')

mintest = np.min(testdataset)
maxtest = np.max(testdataset)

scaler = MinMaxScaler(feature_range=(0, 1))
traindataset = scaler.fit_transform(traindataset)
testdataset = scaler.transform(testdataset)

In [101]:
train,test = traindataset,testdataset
days_in_past = 20
time_step = 1
def create_dataset(dataset,time_step):
    x = [[] for _ in range(3)] 
    Y = [[] for _ in range(2)]
    for i in range(time_step * days_in_past, len(dataset) - time_step * days_in_past): # -time_step is to ensure that the Y value has enough values
        for j in range(3):
            x[j].append(dataset[(i-time_step*days_in_past):i, j])
        for j in range(2):
            Y[j].append([dataset[x + i, j] for x in range(0,time_step)])    
    x = [np.array(feature_list) for feature_list in x]
    Y = [np.array(feature_list) for feature_list in Y] 
    Y = np.stack(Y,axis=1)
    Y = np.reshape(Y, (Y.shape[0], time_step*2))
    return np.stack(x,axis=2), Y


X_train, y_train = create_dataset(train, time_step)
X_test, y_test = create_dataset(test, time_step)


model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50, return_sequences=True))
model.add(LSTM(units=30))
model.add(Dense(units=time_step*2))

model.compile(optimizer='adam', loss='mean_squared_error')

checkpoint_path = "lstm_energy_01.keras"
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])

Epoch 1/5
Epoch 1: val_loss improved from inf to 0.01717, saving model to lstm_energy_01.keras
Epoch 2/5
Epoch 2: val_loss improved from 0.01717 to 0.01117, saving model to lstm_energy_01.keras
Epoch 3/5
Epoch 3: val_loss improved from 0.01117 to 0.00990, saving model to lstm_energy_01.keras
Epoch 4/5
Epoch 4: val_loss improved from 0.00990 to 0.00889, saving model to lstm_energy_01.keras
Epoch 5/5
Epoch 5: val_loss did not improve from 0.00889


<keras.callbacks.History at 0x1f4931dc790>

In [95]:
loss            = model.evaluate(X_test, y_test)
test_predict1   = model.predict(X_test)
print("Loss: ", loss)
# Converting values back to the original scale
scalerBack      = MinMaxScaler(feature_range=(mintest, maxtest))
test_predict2   = scalerBack.fit_transform(test_predict1)
y_test1         = scalerBack.fit_transform(y_test)


Loss:  0.01257658563554287


In [96]:
%matplotlib qt

# Create a 3x3 grid of subplots
fig, axes = plt.subplots(3, 3, figsize=(10, 10))

# Loop over the value index
for i, ax in enumerate(axes.flat):
    # Plot your data or perform any other operations
    ax.plot(y_test1[i,0:time_step], label='Original Testing Data', color='blue')
    ax.plot(test_predict2[i,0:time_step], label='Predicted Testing Data', color='red',alpha=0.8)
    # ax.set_title(f'Plot {i+1}')
    ax.set_title('Testing Data - Predicted vs Actual')
    ax.set_xlabel('Time [hours]')
    ax.set_ylabel('Energy Consumption [kW]')     
    ax.legend()

# Adjust the spacing between subplots
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
%matplotlib qt
index = 100


plt.plot(y_test[index,0:24], label='Original Testing Data', color='blue')
plt.plot(test_predict1[index,0:24], label='Predicted Testing Data', color='red',alpha=0.8)


plt.title('Testing Data - Predicted vs Actual')
plt.xlabel('Time [hours]')
plt.ylabel('Energy [kW]')
plt.legend()
plt.show()

In [None]:
y_test[1, 0:24]