{ "cells": [ { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "from datetime import datetime \n", "from datetime import date\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", "import pandas as pd\n", "from keras.models import Sequential\n", "from keras.layers import LSTM, Dense\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n", "from keras.callbacks import ModelCheckpoint\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datezone_047_hw_valvertu_004_sat_sp_tnzone_047_tempzone_047_fan_spdrtu_004_fltrd_sa_flow_tnrtu_004_sa_temprtu_004_pa_static_stpt_tnrtu_004_oa_flow_tnrtu_004_oadmpr_pct...zone_047_heating_spUnnamed: 47_yhvac_Shp_hws_temparu_001_cwr_temparu_001_cws_fr_gpmaru_001_cws_temparu_001_hwr_temparu_001_hws_fr_gpmaru_001_hws_temp
02018-01-01 00:00:00100.069.067.520.09265.60466.10.060.00000028.0...NaNNaNNaN75.3NaNNaNNaNNaNNaNNaN
12018-01-01 00:01:00100.069.067.520.09265.60466.00.066572.09916228.0...NaNNaNNaN75.3NaNNaNNaNNaNNaNNaN
22018-01-01 00:02:00100.069.067.520.09708.24066.10.067628.83254228.0...NaNNaNNaN75.3NaNNaNNaNNaNNaNNaN
32018-01-01 00:03:00100.069.067.520.09611.63866.10.067710.29461728.0...NaNNaNNaN75.3NaNNaNNaNNaNNaNNaN
42018-01-01 00:04:00100.069.067.520.09215.11066.00.067139.18409028.0...NaNNaNNaN75.3NaNNaNNaNNaNNaNNaN
..................................................................
20721492020-12-31 23:58:00100.068.063.220.018884.83464.40.062938.32000023.4...71.069.023.145000123.856.2554.7156.4123.4261.6122.36
20721502020-12-31 23:58:00100.068.063.220.018884.83464.40.062938.32000023.4...71.069.023.145000123.856.2554.7156.4123.4261.6122.36
20721512020-12-31 23:59:00100.068.063.220.019345.50864.30.063154.39000023.4...71.069.023.145000123.856.2554.7156.4123.4261.6122.36
20721522020-12-31 23:59:00100.068.063.220.019345.50864.30.063154.39000023.4...71.069.023.145000123.856.2554.7156.4123.4261.6122.36
20721532021-01-01 00:00:00100.068.063.220.018650.23264.10.063076.27000022.9...71.069.023.788947123.856.2554.7156.4123.4261.6122.36
\n", "

2072154 rows × 30 columns

\n", "
" ], "text/plain": [ " date zone_047_hw_valve rtu_004_sat_sp_tn \\\n", "0 2018-01-01 00:00:00 100.0 69.0 \n", "1 2018-01-01 00:01:00 100.0 69.0 \n", "2 2018-01-01 00:02:00 100.0 69.0 \n", "3 2018-01-01 00:03:00 100.0 69.0 \n", "4 2018-01-01 00:04:00 100.0 69.0 \n", "... ... ... ... \n", "2072149 2020-12-31 23:58:00 100.0 68.0 \n", "2072150 2020-12-31 23:58:00 100.0 68.0 \n", "2072151 2020-12-31 23:59:00 100.0 68.0 \n", "2072152 2020-12-31 23:59:00 100.0 68.0 \n", "2072153 2021-01-01 00:00:00 100.0 68.0 \n", "\n", " zone_047_temp zone_047_fan_spd rtu_004_fltrd_sa_flow_tn \\\n", "0 67.5 20.0 9265.604 \n", "1 67.5 20.0 9265.604 \n", "2 67.5 20.0 9708.240 \n", "3 67.5 20.0 9611.638 \n", "4 67.5 20.0 9215.110 \n", "... ... ... ... \n", "2072149 63.2 20.0 18884.834 \n", "2072150 63.2 20.0 18884.834 \n", "2072151 63.2 20.0 19345.508 \n", "2072152 63.2 20.0 19345.508 \n", "2072153 63.2 20.0 18650.232 \n", "\n", " rtu_004_sa_temp rtu_004_pa_static_stpt_tn rtu_004_oa_flow_tn \\\n", "0 66.1 0.06 0.000000 \n", "1 66.0 0.06 6572.099162 \n", "2 66.1 0.06 7628.832542 \n", "3 66.1 0.06 7710.294617 \n", "4 66.0 0.06 7139.184090 \n", "... ... ... ... \n", "2072149 64.4 0.06 2938.320000 \n", "2072150 64.4 0.06 2938.320000 \n", "2072151 64.3 0.06 3154.390000 \n", "2072152 64.3 0.06 3154.390000 \n", "2072153 64.1 0.06 3076.270000 \n", "\n", " rtu_004_oadmpr_pct ... zone_047_heating_sp Unnamed: 47_y \\\n", "0 28.0 ... NaN NaN \n", "1 28.0 ... NaN NaN \n", "2 28.0 ... NaN NaN \n", "3 28.0 ... NaN NaN \n", "4 28.0 ... NaN NaN \n", "... ... ... ... ... \n", "2072149 23.4 ... 71.0 69.0 \n", "2072150 23.4 ... 71.0 69.0 \n", "2072151 23.4 ... 71.0 69.0 \n", "2072152 23.4 ... 71.0 69.0 \n", "2072153 22.9 ... 71.0 69.0 \n", "\n", " hvac_S hp_hws_temp aru_001_cwr_temp aru_001_cws_fr_gpm \\\n", "0 NaN 75.3 NaN NaN \n", "1 NaN 75.3 NaN NaN \n", "2 NaN 75.3 NaN NaN \n", "3 NaN 75.3 NaN NaN \n", "4 NaN 75.3 NaN NaN \n", "... ... ... ... ... \n", "2072149 23.145000 123.8 56.25 54.71 \n", "2072150 23.145000 123.8 56.25 54.71 \n", "2072151 23.145000 123.8 56.25 54.71 \n", "2072152 23.145000 123.8 56.25 54.71 \n", "2072153 23.788947 123.8 56.25 54.71 \n", "\n", " aru_001_cws_temp aru_001_hwr_temp aru_001_hws_fr_gpm \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "... ... ... ... \n", "2072149 56.4 123.42 61.6 \n", "2072150 56.4 123.42 61.6 \n", "2072151 56.4 123.42 61.6 \n", "2072152 56.4 123.42 61.6 \n", "2072153 56.4 123.42 61.6 \n", "\n", " aru_001_hws_temp \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "... ... \n", "2072149 122.36 \n", "2072150 122.36 \n", "2072151 122.36 \n", "2072152 122.36 \n", "2072153 122.36 \n", "\n", "[2072154 rows x 30 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged = pd.read_csv(r'C:\\Users\\jerin\\Downloads\\lbnlbldg59\\lbnlbldg59\\lbnlbldg59.processed\\LBNLBLDG59\\clean_Bldg59_2018to2020\\clean data\\long_merge.csv')\n", "\n", "zone = \"47\"\n", "\n", "if zone in [\"36\", \"37\", \"38\", \"39\", \"40\", \"41\", \"42\", \"64\", \"65\", \"66\", \"67\", \"68\", \"69\", \"70\"]:\n", " rtu = \"rtu_001\"\n", " wing = \"hvac_N\"\n", "elif zone in [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\"]:\n", " rtu = \"rtu_003\"\n", " wing = \"hvac_S\"\n", "elif zone in [\"16\", \"17\", \"21\", \"22\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]:\n", " rtu = \"rtu_004\"\n", " wing = \"hvac_S\"\n", "else:\n", " rtu = \"rtu_002\"\n", " wing = \"hvac_N\"\n", "#merged is the dataframe\n", "sorted = merged[[\"date\"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+[\"hp_hws_temp\", \"aru_001_cwr_temp\" , \"aru_001_cws_fr_gpm\" ,\"aru_001_cws_temp\",\"aru_001_hwr_temp\" ,\"aru_001_hws_fr_gpm\" ,\"aru_001_hws_temp\"]]\n", "sorted" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "date 0\n", "zone_047_hw_valve 0\n", "rtu_004_sat_sp_tn 0\n", "zone_047_temp 0\n", "zone_047_fan_spd 0\n", "rtu_004_fltrd_sa_flow_tn 0\n", "rtu_004_sa_temp 0\n", "rtu_004_pa_static_stpt_tn 0\n", "rtu_004_oa_flow_tn 0\n", "rtu_004_oadmpr_pct 0\n", "rtu_004_econ_stpt_tn 0\n", "rtu_004_ra_temp 0\n", "rtu_004_oa_temp 0\n", "rtu_004_ma_temp 0\n", "rtu_004_sf_vfd_spd_fbk_tn 0\n", "rtu_004_rf_vfd_spd_fbk_tn 0\n", "rtu_004_fltrd_gnd_lvl_plenum_press_tn 0\n", "rtu_004_fltrd_lvl2_plenum_press_tn 0\n", "zone_047_cooling_sp 0\n", "Unnamed: 47_x 394570\n", "zone_047_heating_sp 0\n", "Unnamed: 47_y 394570\n", "hvac_S 13035\n", "hp_hws_temp 0\n", "aru_001_cwr_temp 524350\n", "aru_001_cws_fr_gpm 524350\n", "aru_001_cws_temp 524350\n", "aru_001_hwr_temp 299165\n", "aru_001_hws_fr_gpm 299165\n", "aru_001_hws_temp 299165\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_df = sorted.copy()\n", "final_df['date'] = pd.to_datetime(final_df['date'], format = \"%Y-%m-%d %H:%M:%S\")\n", "final_df = final_df[ (final_df.date.dt.date >date(2019, 4, 1)) & (final_df.date.dt.date< date(2020, 2, 15))]\n", "final_df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "testdataset_df = final_df[(final_df.date.dt.date date(2019, 11, 8))]\n", "\n", "testdataset = testdataset_df[['rtu_004_oa_temp','rtu_004_ra_temp','hp_hws_temp','rtu_004_oa_flow_tn','rtu_004_oadmpr_pct',\n", " 'rtu_004_sat_sp_tn','rtu_004_rf_vfd_spd_fbk_tn','rtu_004_ma_temp','rtu_004_sa_temp','rtu_004_fltrd_sa_flow_tn',\n", " 'rtu_004_sf_vfd_spd_fbk_tn']].values\n", "\n", "\n", "traindataset = traindataset_df[['rtu_004_oa_temp','rtu_004_ra_temp','hp_hws_temp','rtu_004_oa_flow_tn','rtu_004_oadmpr_pct',\n", " 'rtu_004_sat_sp_tn','rtu_004_rf_vfd_spd_fbk_tn','rtu_004_ma_temp','rtu_004_sa_temp','rtu_004_fltrd_sa_flow_tn',\n", " 'rtu_004_sf_vfd_spd_fbk_tn']].values" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "traindataset = traindataset.astype('float32')\n", "testdataset = testdataset.astype('float32')\n", "\n", "\n", "scaler = MinMaxScaler(feature_range=(0, 1))\n", "traindataset = scaler.fit_transform(traindataset)\n", "testdataset = scaler.transform(testdataset)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", " super().__init__(**kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0071\n", "Epoch 1: val_loss improved from inf to 0.01145, saving model to lstm2.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m77s\u001b[0m 23ms/step - loss: 0.0071 - val_loss: 0.0115\n", "Epoch 2/10\n", "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0013\n", "Epoch 2: val_loss improved from 0.01145 to 0.01144, saving model to lstm2.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 23ms/step - loss: 0.0013 - val_loss: 0.0114\n", "Epoch 3/10\n", "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0010\n", "Epoch 3: val_loss improved from 0.01144 to 0.00729, saving model to lstm2.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m71s\u001b[0m 22ms/step - loss: 0.0010 - val_loss: 0.0073\n", "Epoch 4/10\n", "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 5.5876e-04\n", "Epoch 4: val_loss improved from 0.00729 to 0.00409, saving model to lstm2.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 23ms/step - loss: 5.5871e-04 - val_loss: 0.0041\n", "Epoch 5/10\n", "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 3.9261e-04\n", "Epoch 5: val_loss improved from 0.00409 to 0.00386, saving model to lstm2.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m72s\u001b[0m 22ms/step - loss: 3.9260e-04 - val_loss: 0.0039\n", "Epoch 6/10\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 3.3977e-04\n", "Epoch 6: val_loss did not improve from 0.00386\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m70s\u001b[0m 22ms/step - loss: 3.3976e-04 - val_loss: 0.0049\n", "Epoch 7/10\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 3.0365e-04\n", "Epoch 7: val_loss did not improve from 0.00386\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m69s\u001b[0m 22ms/step - loss: 3.0364e-04 - val_loss: 0.0052\n", "Epoch 8/10\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 2.7422e-04\n", "Epoch 8: val_loss did not improve from 0.00386\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m70s\u001b[0m 22ms/step - loss: 2.7422e-04 - val_loss: 0.0052\n", "Epoch 9/10\n", "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 2.5380e-04\n", "Epoch 9: val_loss did not improve from 0.00386\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 23ms/step - loss: 2.5379e-04 - val_loss: 0.0058\n", "Epoch 10/10\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 2.3404e-04\n", "Epoch 10: val_loss did not improve from 0.00386\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m72s\u001b[0m 22ms/step - loss: 2.3403e-04 - val_loss: 0.0099\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train,test = traindataset,testdataset\n", "\n", "def create_dataset(dataset,time_step):\n", " x1,x2,x3,x4,x5,x6,x7,x8,x9,Y = [],[],[],[],[],[],[],[],[],[]\n", " for i in range(len(dataset)-time_step-1):\n", " x1.append(dataset[i:(i+time_step), 0])\n", " x2.append(dataset[i:(i+time_step), 1])\n", " x3.append(dataset[i:(i+time_step), 2])\n", " x4.append(dataset[i:(i+time_step), 3])\n", " x5.append(dataset[i:(i+time_step), 4])\n", " x6.append(dataset[i:(i+time_step), 5])\n", " x7.append(dataset[i:(i+time_step), 6])\n", " x8.append(dataset[i:(i+time_step), 7])\n", " # x9.append(dataset[i:(i+time_step), 8])\n", " Y.append([dataset[i + time_step, 7]])\n", " x1,x2,x3,x4,x5,x6,x7,x8 = np.array(x1),np.array(x2),np.array(x3), np.array(x4),np.array(x5),np.array(x6),np.array(x7),np.array(x8)#,np.array(x9)\n", " Y = np.reshape(Y,(len(Y),1))\n", " return np.stack([x1,x2,x3,x4,x5,x6,x7,x8],axis=2),Y\n", "\n", "\n", "\n", "\n", "time_step = 30\n", "X_train, y_train = create_dataset(train, time_step)\n", "X_test, y_test = create_dataset(test, time_step)\n", "\n", "\n", "model = Sequential()\n", "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n", "model.add(LSTM(units=50, return_sequences=True))\n", "model.add(LSTM(units=30))\n", "model.add(Dense(units=1))\n", "\n", "model.compile(optimizer='adam', loss='mean_squared_error')\n", "\n", "checkpoint_path = \"lstm2.keras\"\n", "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n", "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n", "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - loss: 1.8977e-04\n", "Epoch 1: val_loss improved from inf to 0.01131, saving model to lstm2.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m94s\u001b[0m 29ms/step - loss: 1.8977e-04 - val_loss: 0.0113\n", "Epoch 2/5\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - loss: 1.7357e-04\n", "Epoch 2: val_loss did not improve from 0.01131\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m91s\u001b[0m 28ms/step - loss: 1.7358e-04 - val_loss: 0.0123\n", "Epoch 3/5\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - loss: 1.6701e-04\n", "Epoch 3: val_loss did not improve from 0.01131\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m92s\u001b[0m 28ms/step - loss: 1.6701e-04 - val_loss: 0.0127\n", "Epoch 4/5\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - loss: 1.7043e-04\n", "Epoch 4: val_loss did not improve from 0.01131\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m91s\u001b[0m 28ms/step - loss: 1.7043e-04 - val_loss: 0.0131\n", "Epoch 5/5\n", "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - loss: 1.6319e-04\n", "Epoch 5: val_loss did not improve from 0.01131\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m104s\u001b[0m 32ms/step - loss: 1.6319e-04 - val_loss: 0.0134\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n", "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m9900/9900\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m34s\u001b[0m 3ms/step\n" ] } ], "source": [ "# train_predict = model.predict(X_train)\n", "test_predict = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "#'rtu_004_ma_temp','rtu_004_sa_temp'\n", "var = 0\n", "plt.plot(testdataset_df['date'][31:],y_test, label='Original Testing Data', color='blue')\n", "plt.plot(testdataset_df['date'][31:],test_predict, label='Predicted Testing Data', color='red',alpha=0.8)\n", "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n", "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n", "\n", "\n", "plt.title('Testing Data - Predicted vs Actual')\n", "plt.xlabel('Time')\n", "plt.ylabel('Value')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n" ] } ], "source": [ "from tensorflow.keras.models import load_model\n", "# model.save(\"MA_temp_model.h5\") \n", "# loaded_model = load_model(\"MA_temp_model.h5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "ENERGY DATA" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateair_temp_set_1air_temp_set_2dew_point_temperature_set_1drelative_humidity_set_1solar_radiation_set_1wifi_third_southwifi_fourth_southhvac_Nhvac_S
02018-01-01 00:00:0011.6411.518.179.0786.7NaNNaNNaNNaN
12018-01-01 00:01:0011.6411.518.179.0786.7NaNNaNNaNNaN
\n", "
" ], "text/plain": [ " date air_temp_set_1 air_temp_set_2 \\\n", "0 2018-01-01 00:00:00 11.64 11.51 \n", "1 2018-01-01 00:01:00 11.64 11.51 \n", "\n", " dew_point_temperature_set_1d relative_humidity_set_1 \\\n", "0 8.1 79.07 \n", "1 8.1 79.07 \n", "\n", " solar_radiation_set_1 wifi_third_south wifi_fourth_south hvac_N hvac_S \n", "0 86.7 NaN NaN NaN NaN \n", "1 86.7 NaN NaN NaN NaN " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zone = [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\",\"16\", \"17\", \"21\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]\n", "rtu = [\"rtu_001\",\"rtu_002\",\"rtu_003\",\"rtu_004\"]\n", "wing = [\"hvac_N\",\"hvac_S\"]\n", "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n", "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n", "# any(sub in col for sub in zone) or\n", "energy_data = merged[[\"date\"]+[col for col in merged.columns if \n", " any(sub in col for sub in env) or any(sub in col for sub in wifi)]+wing]\n", "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n", "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n", "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n", "# df_filtered = df_filtered.dropna()\n", "df_filtered.head(2)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are NA values in the DataFrame columns.\n" ] } ], "source": [ "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n", "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2019, 4, 1)) & (df_filtered.date.dt.date< date(2020, 2, 15))]\n", "# df_filtered.isna().sum()\n", "if df_filtered.isna().any().any():\n", " print(\"There are NA values in the DataFrame columns.\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "testdataset_df = df_filtered[(df_filtered.date.dt.date date(2019, 11, 8))]\n", "\n", "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n", "\n", "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n", "\n", "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n", "columns_with_na" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "traindataset = traindataset.astype('float32')\n", "testdataset = testdataset.astype('float32')\n", "\n", "scaler = MinMaxScaler(feature_range=(0, 1))\n", "traindataset = scaler.fit_transform(traindataset)\n", "testdataset = scaler.transform(testdataset)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/3\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0036\n", "Epoch 1: val_loss improved from inf to 0.00068, saving model to lstm_energy_01.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0036 - val_loss: 6.8049e-04\n", "Epoch 2/3\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.8055e-04\n", "Epoch 2: val_loss improved from 0.00068 to 0.00064, saving model to lstm_energy_01.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m60s\u001b[0m 19ms/step - loss: 4.8055e-04 - val_loss: 6.4225e-04\n", "Epoch 3/3\n", "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.6623e-04\n", "Epoch 3: val_loss improved from 0.00064 to 0.00061, saving model to lstm_energy_01.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m61s\u001b[0m 19ms/step - loss: 4.6622e-04 - val_loss: 6.0579e-04\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train,test = traindataset,testdataset\n", "\n", "def create_dataset(dataset,time_step):\n", " x = [[] for _ in range(9)] \n", " Y = []\n", " for i in range(len(dataset) - time_step - 1):\n", " for j in range(9):\n", " x[j].append(dataset[i:(i + time_step), j])\n", " Y.append([dataset[i + time_step, 7],dataset[i + time_step, 8]])\n", " x= [np.array(feature_list) for feature_list in x]\n", " Y = np.reshape(Y,(len(Y),2))\n", " return np.stack(x,axis=2),Y\n", "\n", "time_step = 30\n", "X_train, y_train = create_dataset(train, time_step)\n", "X_test, y_test = create_dataset(test, time_step)\n", "\n", "\n", "model = Sequential()\n", "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n", "model.add(LSTM(units=50, return_sequences=True))\n", "model.add(LSTM(units=30))\n", "model.add(Dense(units=2))\n", "\n", "model.compile(optimizer='adam', loss='mean_squared_error')\n", "\n", "checkpoint_path = \"lstm_energy_01.keras\"\n", "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n", "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n", "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 3ms/step\n" ] } ], "source": [ "test_predict1 = model.predict(X_test)\n", "# train_predict1 = model.predict(X_train)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "var = 1\n", "plt.plot(testdataset_df['date'][31:],y_test[:,1], label='Original Testing Data', color='blue')\n", "plt.plot(testdataset_df['date'][31:],test_predict1[:,1], label='Predicted Testing Data', color='red',alpha=0.8)\n", "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n", "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n", "\n", "\n", "plt.title('Testing Data - Predicted vs Actual')\n", "plt.xlabel('Time')\n", "plt.ylabel('Value')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n" ] } ], "source": [ "# from tensorflow.keras.models import load_model\n", "# model.save(\"energy_model_01.h5\") " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%matplotlib qt\n", "plt.plot(df_filtered['date'],df_filtered['hvac_S'])\n", "plt.plot(df_filtered['date'],df_filtered['rtu_003_sf_vfd_spd_fbk_tn'])\n", "plt.plot(df_filtered['date'],df_filtered['zone_025_temp'])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.plot(merged['hvac_S'])\n", "plt.plot(testdataset_df['hvac_S'])\n", "plt.plot(traindataset_df['hvac_S'],'r')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.plot(merged['hvac_N'])\n", "plt.plot(testdataset_df['hvac_N'])\n", "plt.plot(traindataset_df['hvac_N'],'r')" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# merged.columns.to_list()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.16.1\n" ] } ], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "LSTM 2.0" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datehp_hws_temprtu_003_sat_sp_tnrtu_003_fltrd_sa_flow_tnrtu_003_sa_temprtu_003_pa_static_stpt_tnrtu_003_oa_flow_tnrtu_003_oadmpr_pctrtu_003_econ_stpt_tnrtu_003_ra_temp...rtu_003_rf_vfd_spd_fbk_tnrtu_003_fltrd_gnd_lvl_plenum_press_tnrtu_003_fltrd_lvl2_plenum_press_tnwifi_third_southwifi_fourth_southair_temp_set_1air_temp_set_2dew_point_temperature_set_1drelative_humidity_set_1solar_radiation_set_1
02018-01-01 00:00:0075.365.013558.53965.50.60.00000034.665.067.9...49.90.040.05NaNNaN11.6411.518.179.0786.7
12018-01-01 00:01:0075.365.013592.90965.60.65992.05957234.665.067.9...49.40.040.04NaNNaN11.6411.518.179.0786.7
\n", "

2 rows × 23 columns

\n", "
" ], "text/plain": [ " date hp_hws_temp rtu_003_sat_sp_tn \\\n", "0 2018-01-01 00:00:00 75.3 65.0 \n", "1 2018-01-01 00:01:00 75.3 65.0 \n", "\n", " rtu_003_fltrd_sa_flow_tn rtu_003_sa_temp rtu_003_pa_static_stpt_tn \\\n", "0 13558.539 65.5 0.6 \n", "1 13592.909 65.6 0.6 \n", "\n", " rtu_003_oa_flow_tn rtu_003_oadmpr_pct rtu_003_econ_stpt_tn \\\n", "0 0.000000 34.6 65.0 \n", "1 5992.059572 34.6 65.0 \n", "\n", " rtu_003_ra_temp ... rtu_003_rf_vfd_spd_fbk_tn \\\n", "0 67.9 ... 49.9 \n", "1 67.9 ... 49.4 \n", "\n", " rtu_003_fltrd_gnd_lvl_plenum_press_tn rtu_003_fltrd_lvl2_plenum_press_tn \\\n", "0 0.04 0.05 \n", "1 0.04 0.04 \n", "\n", " wifi_third_south wifi_fourth_south air_temp_set_1 air_temp_set_2 \\\n", "0 NaN NaN 11.64 11.51 \n", "1 NaN NaN 11.64 11.51 \n", "\n", " dew_point_temperature_set_1d relative_humidity_set_1 \\\n", "0 8.1 79.07 \n", "1 8.1 79.07 \n", "\n", " solar_radiation_set_1 \n", "0 86.7 \n", "1 86.7 \n", "\n", "[2 rows x 23 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rtu = [\"rtu_003\"]\n", "# wing = [\"hvac_N\",\"hvac_S\"]\n", "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n", "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n", "[\"rtu_003_ma_temp\",]\n", "# any(sub in col for sub in zone) or\n", "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n", " any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n", "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n", "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n", "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n", "# df_filtered = df_filtered.dropna()\n", "df_filtered.head(2)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n", "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2019, 4, 1)) & (df_filtered.date.dt.date< date(2020, 2, 15))]\n", "# df_filtered.isna().sum()\n", "if df_filtered.isna().any().any():\n", " print(\"There are NA values in the DataFrame columns.\")" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n", " 'rtu_003_sa_temp',\n", " 'rtu_003_oadmpr_pct',\n", " 'rtu_003_ra_temp',\n", " 'rtu_003_oa_temp',\n", " 'rtu_003_ma_temp',\n", " 'rtu_003_sf_vfd_spd_fbk_tn',\n", " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n", " 'wifi_fourth_south',\n", " 'air_temp_set_1',\n", " 'air_temp_set_2',\n", " 'dew_point_temperature_set_1d',\n", " 'relative_humidity_set_1',\n", " 'solar_radiation_set_1']]" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 188, "metadata": {}, "output_type": "execute_result" } ], "source": [ "testdataset_df = df_filtered[(df_filtered.date.dt.date date(2019, 11, 8))]\n", "# .ewm(com = 1000,adjust=True).mean()\n", "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n", "\n", "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n", "\n", "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n", "columns_with_na" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [], "source": [ "traindataset = traindataset.astype('float32')\n", "testdataset = testdataset.astype('float32')\n", "\n", "scaler = StandardScaler()\n", "traindataset = scaler.fit_transform(traindataset)\n", "testdataset = scaler.transform(testdataset)" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", " super().__init__(**kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.1413\n", "Epoch 1: val_loss improved from inf to 0.52256, saving model to lstm_smooth_01.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.1412 - val_loss: 0.5226\n", "Epoch 2/5\n", "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0393\n", "Epoch 2: val_loss improved from 0.52256 to 0.50228, saving model to lstm_smooth_01.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0393 - val_loss: 0.5023\n", "Epoch 3/5\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0347\n", "Epoch 3: val_loss improved from 0.50228 to 0.48711, saving model to lstm_smooth_01.keras\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0347 - val_loss: 0.4871\n", "Epoch 4/5\n", "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0319\n", "Epoch 4: val_loss did not improve from 0.48711\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.0319 - val_loss: 0.4958\n", "Epoch 5/5\n", "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0303\n", "Epoch 5: val_loss did not improve from 0.48711\n", "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0303 - val_loss: 0.5026\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train,test = traindataset,testdataset\n", "\n", "def create_dataset(dataset,time_step):\n", " x = [[] for _ in range(15)] \n", " Y = []\n", " for i in range(len(dataset) - time_step - 1):\n", " for j in range(15):\n", " x[j].append(dataset[i:(i + time_step), j])\n", " Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n", " dataset[i + time_step, 6],dataset[i + time_step, 7]])\n", " x= [np.array(feature_list) for feature_list in x]\n", " Y = np.reshape(Y,(len(Y),8))\n", " return np.stack(x,axis=2),Y\n", "\n", "time_step = 30\n", "X_train, y_train = create_dataset(train, time_step)\n", "X_test, y_test = create_dataset(test, time_step)\n", "\n", "\n", "model = Sequential()\n", "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n", "model.add(LSTM(units=50, return_sequences=True))\n", "model.add(LSTM(units=30))\n", "model.add(Dense(units=8))\n", "\n", "model.compile(optimizer='adam', loss='mean_squared_error')\n", "\n", "checkpoint_path = \"lstm_smooth_01.keras\"\n", "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n", "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m23s\u001b[0m 4ms/step\n" ] } ], "source": [ "test_predict1 = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "var = 0\n", "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n", "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n", "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)[var]\n", "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n", "\n", "\n", "plt.title('Testing Data - Predicted vs Actual')\n", "plt.xlabel('Time')\n", "plt.ylabel('Value')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 176, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 176, "metadata": {}, "output_type": "execute_result" } ], "source": [ "var = 0\n", "plt.plot((test_predict1 - y_test)[:,var])" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "params = ['hp_hws_temp',\n", " 'rtu_003_sa_temp',\n", " 'rtu_003_oadmpr_pct',\n", " 'rtu_003_ra_temp',\n", " 'rtu_003_oa_temp',\n", " 'rtu_003_ma_temp',\n", " 'rtu_003_sf_vfd_spd_fbk_tn',\n", " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n", " 'wifi_fourth_south',\n", " 'air_temp_set_1',\n", " 'air_temp_set_2',\n", " 'dew_point_temperature_set_1d',\n", " 'relative_humidity_set_1',\n", " 'solar_radiation_set_1']\n", "\n", "idx_2_params = {}\n", "for i, param in enumerate(params):\n", " idx_2_params[i] = param" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 'hp_hws_temp',\n", " 1: 'rtu_003_sa_temp',\n", " 2: 'rtu_003_oadmpr_pct',\n", " 3: 'rtu_003_ra_temp',\n", " 4: 'rtu_003_oa_temp',\n", " 5: 'rtu_003_ma_temp',\n", " 6: 'rtu_003_sf_vfd_spd_fbk_tn',\n", " 7: 'rtu_003_rf_vfd_spd_fbk_tn',\n", " 8: 'wifi_third_south',\n", " 9: 'wifi_fourth_south',\n", " 10: 'air_temp_set_1',\n", " 11: 'air_temp_set_2',\n", " 12: 'dew_point_temperature_set_1d',\n", " 13: 'relative_humidity_set_1',\n", " 14: 'solar_radiation_set_1'}" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "idx_2_params" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "KMEANS" ] }, { "cell_type": "code", "execution_count": 194, "metadata": {}, "outputs": [], "source": [ "from sklearn.cluster import KMeans\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.decomposition import PCA\n", "# Generating random data for demonstration\n", "np.random.seed(0)\n", "X = test_predict1 - y_test\n", "\n", "pca = PCA(n_components=2)\n", "X = pca.fit_transform(X)\n", "\n", "\n", "k = 3\n", "\n", "kmeans = KMeans(n_clusters=k)\n", "\n", "kmeans.fit(X)\n", "\n", "# Getting the cluster centers and labels\n", "centroids = kmeans.cluster_centers_\n", "labels = kmeans.labels_\n", "\n", "# Plotting the data points and cluster centers\n", "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5, edgecolors='k')\n", "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n", "plt.title('KMeans Clustering')\n", "plt.xlabel('Feature 1')\n", "plt.ylabel('Feature 2')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.plot((test_predict1 - y_test)[:,2])" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 5.8607887e-02, -2.4713947e-01, 2.4978706e-01, -7.8289807e-01,\n", " -2.0218764e-01, -2.8860569e-01, 2.7817219e-01, 2.4209845e-01],\n", " [-2.6845999e-02, 1.2596852e-01, 9.6294099e-01, 2.0099232e-01,\n", " 3.3391420e-02, 7.7613303e-04, -7.1204931e-02, -9.7836025e-02]],\n", " dtype=float32)" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca.components_" ] }, { "cell_type": "code", "execution_count": 204, "metadata": {}, "outputs": [], "source": [ "k = 60\n", "X= test_predict1 - y_test\n", "processed_data = []\n", "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n", "for i in range(0,len(X), 30 ):\n", " mean = X[i:i+k].mean(axis = 0)\n", " std = X[i:i+k].std(axis = 0)\n", " max = X[i:i+k].max(axis = 0)\n", " min = X[i:i+k].min(axis = 0)\n", " iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n", " data = np.concatenate([mean, std, max, min, iqr])\n", " processed_data.append([data])\n", "processed_data = np.concatenate(processed_data,axis=0) " ] }, { "cell_type": "code", "execution_count": 197, "metadata": {}, "outputs": [], "source": [ "X = processed_data\n", "\n", "pca = PCA(n_components=2)\n", "X = pca.fit_transform(X)\n", "\n", "\n", "k = 4\n", "\n", "kmeans = KMeans(n_clusters=k)\n", "\n", "kmeans.fit(X)\n", "\n", "# Getting the cluster centers and labels\n", "centroids = kmeans.cluster_centers_\n", "labels = kmeans.labels_\n", "\n", "# Plotting the data points and cluster centers\n", "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n", "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n", "plt.title('KMeans Clustering')\n", "plt.xlabel('Feature 1')\n", "plt.ylabel('Feature 2')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [], "source": [ "\n", "dd = df_filtered.drop(columns=[\"date\"],inplace=False)\n", "dg = dd.ewm(com = 1000,adjust=True).mean()" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datehp_hws_temprtu_003_sa_temprtu_003_oadmpr_pctrtu_003_ra_temprtu_003_oa_temprtu_003_ma_temprtu_003_sf_vfd_spd_fbk_tnrtu_003_rf_vfd_spd_fbk_tnwifi_third_southwifi_fourth_southair_temp_set_1air_temp_set_2dew_point_temperature_set_1drelative_humidity_set_1solar_radiation_set_1
5558452019-04-02 00:00:00120.766.784.472.259.266.779.653.734.031.015.6714.9211.7777.80147.1
5558462019-04-02 00:01:00120.465.885.472.259.565.078.154.434.031.015.6714.9211.7777.80147.1
5558472019-04-02 00:02:00120.165.166.272.159.464.078.060.134.031.015.6714.9211.7777.80147.1
5558482019-04-02 00:03:00119.664.956.072.259.465.779.155.534.031.015.6714.9211.7777.80147.1
5558492019-04-02 00:04:00119.365.554.672.059.267.175.253.134.031.015.6714.9211.7777.80147.1
...................................................
10801902020-02-14 23:57:00121.967.352.873.263.569.280.961.30.00.016.4213.936.9353.66347.9
10801912020-02-14 23:58:00122.769.264.873.363.470.081.053.80.00.016.4213.936.9353.66347.9
10801922020-02-14 23:58:00122.769.264.873.363.470.081.053.80.00.016.4213.936.9353.66347.9
10801932020-02-14 23:59:00122.968.780.873.363.167.382.260.10.00.016.4213.936.9353.66347.9
10801942020-02-14 23:59:00122.968.780.873.363.167.382.260.10.00.016.4213.936.9353.66347.9
\n", "

524350 rows × 16 columns

\n", "
" ], "text/plain": [ " date hp_hws_temp rtu_003_sa_temp rtu_003_oadmpr_pct \\\n", "555845 2019-04-02 00:00:00 120.7 66.7 84.4 \n", "555846 2019-04-02 00:01:00 120.4 65.8 85.4 \n", "555847 2019-04-02 00:02:00 120.1 65.1 66.2 \n", "555848 2019-04-02 00:03:00 119.6 64.9 56.0 \n", "555849 2019-04-02 00:04:00 119.3 65.5 54.6 \n", "... ... ... ... ... \n", "1080190 2020-02-14 23:57:00 121.9 67.3 52.8 \n", "1080191 2020-02-14 23:58:00 122.7 69.2 64.8 \n", "1080192 2020-02-14 23:58:00 122.7 69.2 64.8 \n", "1080193 2020-02-14 23:59:00 122.9 68.7 80.8 \n", "1080194 2020-02-14 23:59:00 122.9 68.7 80.8 \n", "\n", " rtu_003_ra_temp rtu_003_oa_temp rtu_003_ma_temp \\\n", "555845 72.2 59.2 66.7 \n", "555846 72.2 59.5 65.0 \n", "555847 72.1 59.4 64.0 \n", "555848 72.2 59.4 65.7 \n", "555849 72.0 59.2 67.1 \n", "... ... ... ... \n", "1080190 73.2 63.5 69.2 \n", "1080191 73.3 63.4 70.0 \n", "1080192 73.3 63.4 70.0 \n", "1080193 73.3 63.1 67.3 \n", "1080194 73.3 63.1 67.3 \n", "\n", " rtu_003_sf_vfd_spd_fbk_tn rtu_003_rf_vfd_spd_fbk_tn \\\n", "555845 79.6 53.7 \n", "555846 78.1 54.4 \n", "555847 78.0 60.1 \n", "555848 79.1 55.5 \n", "555849 75.2 53.1 \n", "... ... ... \n", "1080190 80.9 61.3 \n", "1080191 81.0 53.8 \n", "1080192 81.0 53.8 \n", "1080193 82.2 60.1 \n", "1080194 82.2 60.1 \n", "\n", " wifi_third_south wifi_fourth_south air_temp_set_1 air_temp_set_2 \\\n", "555845 34.0 31.0 15.67 14.92 \n", "555846 34.0 31.0 15.67 14.92 \n", "555847 34.0 31.0 15.67 14.92 \n", "555848 34.0 31.0 15.67 14.92 \n", "555849 34.0 31.0 15.67 14.92 \n", "... ... ... ... ... \n", "1080190 0.0 0.0 16.42 13.93 \n", "1080191 0.0 0.0 16.42 13.93 \n", "1080192 0.0 0.0 16.42 13.93 \n", "1080193 0.0 0.0 16.42 13.93 \n", "1080194 0.0 0.0 16.42 13.93 \n", "\n", " dew_point_temperature_set_1d relative_humidity_set_1 \\\n", "555845 11.77 77.80 \n", "555846 11.77 77.80 \n", "555847 11.77 77.80 \n", "555848 11.77 77.80 \n", "555849 11.77 77.80 \n", "... ... ... \n", "1080190 6.93 53.66 \n", "1080191 6.93 53.66 \n", "1080192 6.93 53.66 \n", "1080193 6.93 53.66 \n", "1080194 6.93 53.66 \n", "\n", " solar_radiation_set_1 \n", "555845 147.1 \n", "555846 147.1 \n", "555847 147.1 \n", "555848 147.1 \n", "555849 147.1 \n", "... ... \n", "1080190 347.9 \n", "1080191 347.9 \n", "1080192 347.9 \n", "1080193 347.9 \n", "1080194 347.9 \n", "\n", "[524350 rows x 16 columns]" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_filtered" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plt.plot(dd[\"hp_hws_temp\"])\n", "plt.plot(dg[\"hp_hws_temp\"])" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [], "source": [ "from sklearn.mixture import GaussianMixture\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "# Generating random data for demonstration\n", "np.random.seed(0)\n", "X = processed_data\n", "\n", "# Creating the GMM instance with desired number of clusters\n", "gmm = GaussianMixture(n_components=2)\n", "\n", "# Fitting the model to the data\n", "gmm.fit(X)\n", "\n", "# Getting the cluster labels\n", "labels = gmm.predict(X)\n", "\n", "# Plotting the data points with colors representing different clusters\n", "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n", "plt.title('GMM Clustering')\n", "plt.xlabel('Feature 1')\n", "plt.ylabel('Feature 2')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "smartbuilding", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 2 }