{ "cells": [ { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "from datetime import datetime \n", "from datetime import date\n", "import matplotlib.pyplot as plt\n", "# import seaborn as sns\n", "import numpy as np\n", "import pandas as pd\n", "from keras.models import Sequential\n", "from keras.layers import LSTM, Dense\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n", "from keras.callbacks import ModelCheckpoint\n", "\n", "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n", "# all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")\n", "all_data = pd.read_csv(dataPATH + r\"\\extended_energy_data.csv\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datehvac_Nhvac_Sair_temp_set_1solar_radiation_set_1
02018-01-01 00:00:00NaNNaN11.6486.70
12018-01-01 00:15:00NaNNaN11.4945.88
22018-01-01 00:30:00NaNNaN11.5951.62
32018-01-01 00:45:00NaNNaN11.4421.43
42018-01-01 01:00:0037.40000219.511.126.45
\n", "
" ], "text/plain": [ " date hvac_N hvac_S air_temp_set_1 \\\n", "0 2018-01-01 00:00:00 NaN NaN 11.64 \n", "1 2018-01-01 00:15:00 NaN NaN 11.49 \n", "2 2018-01-01 00:30:00 NaN NaN 11.59 \n", "3 2018-01-01 00:45:00 NaN NaN 11.44 \n", "4 2018-01-01 01:00:00 37.400002 19.5 11.12 \n", "\n", " solar_radiation_set_1 \n", "0 86.70 \n", "1 45.88 \n", "2 51.62 \n", "3 21.43 \n", "4 6.45 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Prepar energy data set with extended features\n", "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n", "extended_energy_data = all_data[feature_list]\n", "\n", "extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n", "extended_energy_data.set_index('date', inplace=True)\n", "\n", "eed_15m = extended_energy_data.resample('15T').mean()\n", "eed_15m = eed_15m.reset_index(drop=False)\n", "\n", "window_size = 12*4 # Half a day\n", "eed_15m_avg = eed_15m.copy()\n", "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n", "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n", "\n", "eed_15m.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# energy_data = pd.read_csv(dataPATH + r\"\\extended_energy_data.csv\")\n", "# energy_data = eed_15m\n", "energy_data = eed_15m_avg\n", "\n", "# Convert the date column to datetime\n", "energy_data['date'] = pd.to_datetime(energy_data['date'], format = \"%Y-%m-%d %H:%M:%S\")\n", "\n", "energy_data['day_of_week'] = energy_data['date'].dt.weekday\n", "# Filter the data for the year 2019\n", "df_filtered = energy_data[ (energy_data.date.dt.date >date(2019, 1, 20)) & (energy_data.date.dt.date< date(2019, 7, 26))]\n", "\n", "# Check for NA values in the DataFrame\n", "if df_filtered.isna().any().any():\n", " print(\"There are NA values in the DataFrame columns.\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "testdataset_df = df_filtered[(df_filtered.date.dt.date date(2019, 2, 21))]\n", "\n", "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n", "\n", "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n", "\n", "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n", "columns_with_na" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "traindataset = traindataset.astype('float32')\n", "testdataset = testdataset.astype('float32')\n", "\n", "mintest = np.min(testdataset[:,0:2])\n", "maxtest = np.max(testdataset[:,0:2])\n", "\n", "scaler = MinMaxScaler(feature_range=(0, 1))\n", "traindataset = scaler.fit_transform(traindataset)\n", "testdataset = scaler.transform(testdataset)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n", "225/229 [============================>.] - ETA: 0s - loss: 0.0130\n", "Epoch 1: val_loss improved from inf to 0.01885, saving model to lstm_energy_01.keras\n", "229/229 [==============================] - 8s 16ms/step - loss: 0.0129 - val_loss: 0.0189\n", "Epoch 2/5\n", "229/229 [==============================] - ETA: 0s - loss: 0.0058\n", "Epoch 2: val_loss did not improve from 0.01885\n", "229/229 [==============================] - 3s 11ms/step - loss: 0.0058 - val_loss: 0.0192\n", "Epoch 3/5\n", "225/229 [============================>.] - ETA: 0s - loss: 0.0052\n", "Epoch 3: val_loss improved from 0.01885 to 0.01818, saving model to lstm_energy_01.keras\n", "229/229 [==============================] - 3s 11ms/step - loss: 0.0052 - val_loss: 0.0182\n", "Epoch 4/5\n", "225/229 [============================>.] - ETA: 0s - loss: 0.0045\n", "Epoch 4: val_loss did not improve from 0.01818\n", "229/229 [==============================] - 3s 11ms/step - loss: 0.0045 - val_loss: 0.0190\n", "Epoch 5/5\n", "226/229 [============================>.] - ETA: 0s - loss: 0.0041\n", "Epoch 5: val_loss did not improve from 0.01818\n", "229/229 [==============================] - 3s 11ms/step - loss: 0.0041 - val_loss: 0.0186\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train,test = traindataset,testdataset\n", "steps_in_past = 3 \n", "time_step = 4*6\n", "no_inputs = 5\n", "no_outputs = 2\n", "def create_dataset(dataset,time_step):\n", " x = [[] for _ in range(no_inputs)] \n", " Y = [[] for _ in range(no_outputs)]\n", " for i in range(time_step * steps_in_past, len(dataset) - time_step * steps_in_past): # -time_step is to ensure that the Y value has enough values\n", " for j in range(no_inputs):\n", " x[j].append(dataset[(i-time_step*steps_in_past):i, j])\n", " for j in range(no_outputs):\n", " Y[j].append(dataset[i:i+time_step, j]) \n", " x = [np.array(feature_list) for feature_list in x]\n", " x = np.stack(x,axis=1)\n", " Y = [np.array(feature_list) for feature_list in Y] \n", " Y = np.stack(Y,axis=1)\n", " Y = np.reshape(Y, (Y.shape[0], time_step*no_outputs))\n", " return x, Y\n", "\n", "\n", "X_train, y_train = create_dataset(train, time_step)\n", "X_test, y_test = create_dataset(test, time_step)\n", "\n", "\n", "model = Sequential()\n", "model.add(LSTM(units=50, return_sequences=True, dropout= 0.2, input_shape=(X_train.shape[1], X_train.shape[2])))\n", "model.add(LSTM(units=50, dropout= 0.2, return_sequences=True))\n", "model.add(LSTM(units=time_step*no_outputs))\n", "model.add(Dense(units=time_step*no_outputs))\n", "\n", "model.compile(optimizer='adam', loss='mean_squared_error')\n", "\n", "checkpoint_path = \"lstm_energy_01.keras\"\n", "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n", "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "86/86 [==============================] - 0s 3ms/step - loss: 0.0186\n", "86/86 [==============================] - 1s 3ms/step\n", "Loss: 0.01863059028983116\n" ] } ], "source": [ "loss = model.evaluate(X_test, y_test)\n", "test_predict1 = model.predict(X_test)\n", "print(\"Loss: \", loss)\n", "# Converting values back to the original scale\n", "scalerBack = MinMaxScaler(feature_range=(mintest, maxtest))\n", "test_predict2 = scalerBack.fit_transform(test_predict1)\n", "y_test1 = scalerBack.fit_transform(y_test)\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "\n", "# Create a 3x3 grid of subplots\n", "fig, axes = plt.subplots(3, 3, figsize=(10, 10))\n", "\n", "var = 100\n", "# Loop over the value index\n", "for i, ax in enumerate(axes.flat):\n", " # Plot your data or perform any other operations\n", " ax.plot(y_test[var+i,0:time_step], label='Original Testing Data', color='blue')\n", " ax.plot(test_predict1[var+i,0:time_step], label='Predicted Testing Data', color='red',alpha=0.8)\n", " # ax.set_title(f'Plot {i+1}')\n", " ax.set_title('Testing Data - Predicted vs Actual')\n", " ax.set_xlabel('Time [hours]')\n", " ax.set_ylabel('Energy Consumption [kW]') \n", " ax.legend()\n", "\n", "# Adjust the spacing between subplots\n", "plt.tight_layout()\n", "\n", "# Show the plot\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Autoregressive prediction\n", "X_pred = testdataset.copy()\n", "for i in range(steps_in_past,steps_in_past*2):\n", " xin = X_pred[i-steps_in_past:i].reshape((1, steps_in_past, no_outputs)) \n", " X_pred[i] = model.predict(xin, verbose = 0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot prediction vs actual for test data\n", "plt.figure()\n", "plt.plot(X_pred[steps_in_past:steps_in_past*2,0],':',label='LSTM')\n", "plt.plot(testdataset[steps_in_past:steps_in_past*2,0],'--',label='Actual')\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "experiments", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" } }, "nbformat": 4, "nbformat_minor": 2 }