Spaces:

smartbuildings
/

smart-buildings

Sleeping

App Files Files Community

jerin commited on Apr 30, 2024

Commit

d81a75f

1 Parent(s): 81c7365

update lstm

Browse files

Files changed (1) hide show

lstm.ipynb +1153 -253

lstm.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -16,7 +16,7 @@
     "from keras.models import Sequential\n",
     "from keras.layers import LSTM, Dense\n",
     "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.preprocessing import MinMaxScaler\n",
     "from keras.callbacks import ModelCheckpoint\n"
    ]
   },
@@ -463,7 +463,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -502,7 +502,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -782,7 +782,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -807,24 +807,13 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>date</th>\n",
-       "      <th>rtu_001_sat_sp_tn</th>\n",
-       "      <th>rtu_002_sat_sp_tn</th>\n",
-       "      <th>rtu_003_sat_sp_tn</th>\n",
-       "      <th>rtu_004_sat_sp_tn</th>\n",
-       "      <th>rtu_001_fltrd_sa_flow_tn</th>\n",
-       "      <th>rtu_002_fltrd_sa_flow_tn</th>\n",
-       "      <th>rtu_003_fltrd_sa_flow_tn</th>\n",
-       "      <th>rtu_004_fltrd_sa_flow_tn</th>\n",
-       "      <th>rtu_001_sa_temp</th>\n",
-       "      <th>...</th>\n",
-       "      <th>rtu_001_fltrd_gnd_lvl_plenum_press_tn</th>\n",
-       "      <th>rtu_002_fltrd_gnd_lvl_plenum_press_tn</th>\n",
-       "      <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
-       "      <th>rtu_004_fltrd_gnd_lvl_plenum_press_tn</th>\n",
-       "      <th>rtu_001_fltrd_lvl2_plenum_press_tn</th>\n",
-       "      <th>rtu_002_fltrd_lvl2_plenum_press_tn</th>\n",
-       "      <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
-       "      <th>rtu_004_fltrd_lvl2_plenum_press_tn</th>\n",
        "      <th>hvac_N</th>\n",
        "      <th>hvac_S</th>\n",
        "    </tr>\n",
@@ -833,101 +822,48 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>2018-01-01 00:00:00</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>70.0</td>\n",
-       "      <td>65.0</td>\n",
-       "      <td>69.0</td>\n",
-       "      <td>14131.449</td>\n",
-       "      <td>13998.757</td>\n",
-       "      <td>13558.539</td>\n",
-       "      <td>9265.604</td>\n",
-       "      <td>67.6</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.030</td>\n",
-       "      <td>0.04</td>\n",
-       "      <td>0.04</td>\n",
-       "      <td>0.047</td>\n",
-       "      <td>0.050</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.050</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>2018-01-01 00:01:00</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>70.0</td>\n",
-       "      <td>65.0</td>\n",
-       "      <td>69.0</td>\n",
-       "      <td>14164.429</td>\n",
-       "      <td>14065.259</td>\n",
-       "      <td>13592.909</td>\n",
-       "      <td>9265.604</td>\n",
-       "      <td>67.6</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.031</td>\n",
-       "      <td>0.04</td>\n",
-       "      <td>0.04</td>\n",
-       "      <td>0.043</td>\n",
-       "      <td>0.048</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.04</td>\n",
-       "      <td>0.046</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>2 rows × 59 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "                  date  rtu_001_sat_sp_tn  rtu_002_sat_sp_tn  \\\n",
-       "0  2018-01-01 00:00:00               68.0               70.0   \n",
-       "1  2018-01-01 00:01:00               68.0               70.0   \n",
-       "\n",
-       "   rtu_003_sat_sp_tn  rtu_004_sat_sp_tn  rtu_001_fltrd_sa_flow_tn  \\\n",
-       "0               65.0               69.0                 14131.449   \n",
-       "1               65.0               69.0                 14164.429   \n",
-       "\n",
-       "   rtu_002_fltrd_sa_flow_tn  rtu_003_fltrd_sa_flow_tn  \\\n",
-       "0                 13998.757                 13558.539   \n",
-       "1                 14065.259                 13592.909   \n",
-       "\n",
-       "   rtu_004_fltrd_sa_flow_tn  rtu_001_sa_temp  ...  \\\n",
-       "0                  9265.604             67.6  ...   \n",
-       "1                  9265.604             67.6  ...   \n",
        "\n",
-       "   rtu_001_fltrd_gnd_lvl_plenum_press_tn  \\\n",
-       "0                                  0.030   \n",
-       "1                                  0.031   \n",
        "\n",
-       "   rtu_002_fltrd_gnd_lvl_plenum_press_tn  \\\n",
-       "0                                   0.04   \n",
-       "1                                   0.04   \n",
-       "\n",
-       "   rtu_003_fltrd_gnd_lvl_plenum_press_tn  \\\n",
-       "0                                   0.04   \n",
-       "1                                   0.04   \n",
-       "\n",
-       "   rtu_004_fltrd_gnd_lvl_plenum_press_tn  rtu_001_fltrd_lvl2_plenum_press_tn  \\\n",
-       "0                                  0.047                               0.050   \n",
-       "1                                  0.043                               0.048   \n",
-       "\n",
-       "   rtu_002_fltrd_lvl2_plenum_press_tn  rtu_003_fltrd_lvl2_plenum_press_tn  \\\n",
-       "0                                0.05                                0.05   \n",
-       "1                                0.05                                0.04   \n",
-       "\n",
-       "   rtu_004_fltrd_lvl2_plenum_press_tn  hvac_N  hvac_S  \n",
-       "0                               0.050     NaN     NaN  \n",
-       "1                               0.046     NaN     NaN  \n",
-       "\n",
-       "[2 rows x 59 columns]"
       ]
      },
-     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -936,8 +872,11 @@
     "zone =  [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\",\"16\", \"17\", \"21\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]\n",
     "rtu = [\"rtu_001\",\"rtu_002\",\"rtu_003\",\"rtu_004\"]\n",
     "wing = [\"hvac_N\",\"hvac_S\"]\n",
     "# any(sub in col for sub in zone) or\n",
-    "energy_data = merged[[\"date\"]+[col for col in merged.columns if any(sub in col for sub in wing) or any(sub in col for sub in rtu)]]\n",
     "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
     "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
     "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
@@ -947,7 +886,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -968,7 +907,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -977,7 +916,7 @@
        "[]"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -997,7 +936,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1011,90 +950,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
-      "  super().__init__(**kwargs)\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 1/15\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 0.0038\n",
-      "Epoch 1: val_loss improved from inf to 0.00894, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m144s\u001b[0m 44ms/step - loss: 0.0038 - val_loss: 0.0089\n",
-      "Epoch 2/15\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - loss: 5.4854e-04\n",
-      "Epoch 2: val_loss improved from 0.00894 to 0.00529, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m137s\u001b[0m 43ms/step - loss: 5.4854e-04 - val_loss: 0.0053\n",
-      "Epoch 3/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 5.0405e-04\n",
-      "Epoch 3: val_loss did not improve from 0.00529\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 5.0405e-04 - val_loss: 0.0063\n",
-      "Epoch 4/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.9573e-04\n",
-      "Epoch 4: val_loss did not improve from 0.00529\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m131s\u001b[0m 41ms/step - loss: 4.9572e-04 - val_loss: 0.0061\n",
-      "Epoch 5/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - loss: 4.9666e-04\n",
-      "Epoch 5: val_loss did not improve from 0.00529\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m135s\u001b[0m 42ms/step - loss: 4.9665e-04 - val_loss: 0.0058\n",
-      "Epoch 6/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.7853e-04\n",
-      "Epoch 6: val_loss improved from 0.00529 to 0.00512, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.7852e-04 - val_loss: 0.0051\n",
-      "Epoch 7/15\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step - loss: 4.3858e-04\n",
-      "Epoch 7: val_loss improved from 0.00512 to 0.00386, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.3859e-04 - val_loss: 0.0039\n",
-      "Epoch 8/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.4643e-04\n",
-      "Epoch 8: val_loss improved from 0.00386 to 0.00321, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.4643e-04 - val_loss: 0.0032\n",
-      "Epoch 9/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.3562e-04\n",
-      "Epoch 9: val_loss improved from 0.00321 to 0.00267, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.3562e-04 - val_loss: 0.0027\n",
-      "Epoch 10/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.3336e-04\n",
-      "Epoch 10: val_loss did not improve from 0.00267\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━���━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.3336e-04 - val_loss: 0.0029\n",
-      "Epoch 11/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.2932e-04\n",
-      "Epoch 11: val_loss did not improve from 0.00267\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.2932e-04 - val_loss: 0.0032\n",
-      "Epoch 12/15\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.1954e-04\n",
-      "Epoch 12: val_loss improved from 0.00267 to 0.00248, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 40ms/step - loss: 4.1954e-04 - val_loss: 0.0025\n",
-      "Epoch 13/15\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step - loss: 4.2671e-04\n",
-      "Epoch 13: val_loss improved from 0.00248 to 0.00245, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.2671e-04 - val_loss: 0.0024\n",
-      "Epoch 14/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.1718e-04\n",
-      "Epoch 14: val_loss did not improve from 0.00245\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.1717e-04 - val_loss: 0.0031\n",
-      "Epoch 15/15\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.0550e-04\n",
-      "Epoch 15: val_loss did not improve from 0.00245\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.0550e-04 - val_loss: 0.0025\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "<keras.src.callbacks.history.History at 0x1fc4b1aecd0>"
       ]
      },
-     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1103,17 +986,17 @@
     "train,test = traindataset,testdataset\n",
     "\n",
     "def create_dataset(dataset,time_step):\n",
-    "    x = [[] for _ in range(58)] \n",
     "    Y = []\n",
     "    for i in range(len(dataset) - time_step - 1):\n",
-    "        for j in range(58):\n",
     "            x[j].append(dataset[i:(i + time_step), j])\n",
-    "        Y.append([dataset[i + time_step, 56],dataset[i + time_step, 57]])\n",
     "    x= [np.array(feature_list) for feature_list in x]\n",
     "    Y = np.reshape(Y,(len(Y),2))\n",
     "    return np.stack(x,axis=2),Y\n",
     "\n",
-    "time_step = 60\n",
     "X_train, y_train = create_dataset(train, time_step)\n",
     "X_test, y_test = create_dataset(test, time_step)\n",
     "\n",
@@ -1121,93 +1004,36 @@
     "model = Sequential()\n",
     "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
     "model.add(LSTM(units=50, return_sequences=True))\n",
-    "model.add(LSTM(units=50))\n",
     "model.add(Dense(units=2))\n",
     "\n",
     "model.compile(optimizer='adam', loss='mean_squared_error')\n",
     "\n",
-    "checkpoint_path = \"lstm3.keras\"\n",
     "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
-    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1/10\n",
-      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0050\n",
-      "Epoch 1: val_loss improved from inf to 0.03991, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0050 - val_loss: 0.0399\n",
-      "Epoch 2/10\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0050\n",
-      "Epoch 2: val_loss did not improve from 0.03991\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0050 - val_loss: 0.0480\n",
-      "Epoch 3/10\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0048\n",
-      "Epoch 3: val_loss did not improve from 0.03991\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0048 - val_loss: 0.0474\n",
-      "Epoch 4/10\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0047\n",
-      "Epoch 4: val_loss did not improve from 0.03991\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0047 - val_loss: 0.0492\n",
-      "Epoch 5/10\n",
-      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0048\n",
-      "Epoch 5: val_loss improved from 0.03991 to 0.03753, saving model to lstm3.keras\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0048 - val_loss: 0.0375\n",
-      "Epoch 6/10\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0046\n",
-      "Epoch 6: val_loss did not improve from 0.03753\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0046 - val_loss: 0.0466\n",
-      "Epoch 7/10\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0043\n",
-      "Epoch 7: val_loss did not improve from 0.03753\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0043 - val_loss: 0.0499\n",
-      "Epoch 8/10\n",
-      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0043\n",
-      "Epoch 8: val_loss did not improve from 0.03753\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0043 - val_loss: 0.0483\n",
-      "Epoch 9/10\n",
-      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0042\n",
-      "Epoch 9: val_loss did not improve from 0.03753\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m69s\u001b[0m 22ms/step - loss: 0.0042 - val_loss: 0.0559\n",
-      "Epoch 10/10\n",
-      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0044\n",
-      "Epoch 10: val_loss did not improve from 0.03753\n",
-      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0044 - val_loss: 0.0470\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<keras.src.callbacks.history.History at 0x153b37086d0>"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
-    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[1m6344/6344\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 7ms/step\n"
      ]
     }
    ],
@@ -1218,14 +1044,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib qt\n",
-    "var = 0\n",
-    "plt.plot(testdataset_df['date'][61:],y_test[:,0], label='Original Testing Data', color='blue')\n",
-    "plt.plot(testdataset_df['date'][61:],test_predict1[:,0], label='Predicted Testing Data', color='red',alpha=0.8)\n",
     "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n",
     "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n",
     "\n",
@@ -1239,7 +1065,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -1251,8 +1077,8 @@
     }
    ],
    "source": [
-    "from tensorflow.keras.models import load_model\n",
-    "model.save(\"energy_model.h5\") "
    ]
   },
   {
@@ -1278,6 +1104,1080 @@
     "plt.plot(df_filtered['date'],df_filtered['zone_025_temp'])"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 181,
    "metadata": {},
    "outputs": [],
    "source": [
     "from keras.models import Sequential\n",
     "from keras.layers import LSTM, Dense\n",
     "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
     "from keras.callbacks import ModelCheckpoint\n"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
        "dtype: int64"
       ]
      },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>date</th>\n",
+       "      <th>air_temp_set_1</th>\n",
+       "      <th>air_temp_set_2</th>\n",
+       "      <th>dew_point_temperature_set_1d</th>\n",
+       "      <th>relative_humidity_set_1</th>\n",
+       "      <th>solar_radiation_set_1</th>\n",
+       "      <th>wifi_third_south</th>\n",
+       "      <th>wifi_fourth_south</th>\n",
        "      <th>hvac_N</th>\n",
        "      <th>hvac_S</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>2018-01-01 00:00:00</td>\n",
+       "      <td>11.64</td>\n",
+       "      <td>11.51</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>79.07</td>\n",
+       "      <td>86.7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>2018-01-01 00:01:00</td>\n",
+       "      <td>11.64</td>\n",
+       "      <td>11.51</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>79.07</td>\n",
+       "      <td>86.7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
+       "                  date  air_temp_set_1  air_temp_set_2  \\\n",
+       "0  2018-01-01 00:00:00           11.64           11.51   \n",
+       "1  2018-01-01 00:01:00           11.64           11.51   \n",
        "\n",
+       "   dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
+       "0                           8.1                    79.07   \n",
+       "1                           8.1                    79.07   \n",
        "\n",
+       "   solar_radiation_set_1  wifi_third_south  wifi_fourth_south  hvac_N  hvac_S  \n",
+       "0                   86.7               NaN                NaN     NaN     NaN  \n",
+       "1                   86.7               NaN                NaN     NaN     NaN  "
       ]
      },
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
     "zone =  [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\",\"16\", \"17\", \"21\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]\n",
     "rtu = [\"rtu_001\",\"rtu_002\",\"rtu_003\",\"rtu_004\"]\n",
     "wing = [\"hvac_N\",\"hvac_S\"]\n",
+    "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
+    "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
     "# any(sub in col for sub in zone) or\n",
+    "energy_data = merged[[\"date\"]+[col for col in merged.columns if \n",
+    "                               any(sub in col for sub in env) or any(sub in col for sub in wifi)]+wing]\n",
     "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
     "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
     "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
        "[]"
       ]
      },
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Epoch 1/3\n",
+      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0036\n",
+      "Epoch 1: val_loss improved from inf to 0.00068, saving model to lstm_energy_01.keras\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0036 - val_loss: 6.8049e-04\n",
+      "Epoch 2/3\n",
+      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.8055e-04\n",
+      "Epoch 2: val_loss improved from 0.00068 to 0.00064, saving model to lstm_energy_01.keras\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m60s\u001b[0m 19ms/step - loss: 4.8055e-04 - val_loss: 6.4225e-04\n",
+      "Epoch 3/3\n",
+      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.6623e-04\n",
+      "Epoch 3: val_loss improved from 0.00064 to 0.00061, saving model to lstm_energy_01.keras\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m61s\u001b[0m 19ms/step - loss: 4.6622e-04 - val_loss: 6.0579e-04\n"
      ]
     },
     {
      "data": {
       "text/plain": [
+       "<keras.src.callbacks.history.History at 0x1ea4d2ed650>"
       ]
      },
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
     "train,test = traindataset,testdataset\n",
     "\n",
     "def create_dataset(dataset,time_step):\n",
+    "    x = [[] for _ in range(9)] \n",
     "    Y = []\n",
     "    for i in range(len(dataset) - time_step - 1):\n",
+    "        for j in range(9):\n",
     "            x[j].append(dataset[i:(i + time_step), j])\n",
+    "        Y.append([dataset[i + time_step, 7],dataset[i + time_step, 8]])\n",
     "    x= [np.array(feature_list) for feature_list in x]\n",
     "    Y = np.reshape(Y,(len(Y),2))\n",
     "    return np.stack(x,axis=2),Y\n",
     "\n",
+    "time_step = 30\n",
     "X_train, y_train = create_dataset(train, time_step)\n",
     "X_test, y_test = create_dataset(test, time_step)\n",
     "\n",
     "model = Sequential()\n",
     "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
     "model.add(LSTM(units=50, return_sequences=True))\n",
+    "model.add(LSTM(units=30))\n",
     "model.add(Dense(units=2))\n",
     "\n",
     "model.compile(optimizer='adam', loss='mean_squared_error')\n",
     "\n",
+    "checkpoint_path = \"lstm_energy_01.keras\"\n",
     "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
+    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
+   "outputs": [],
    "source": [
+    "# checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
+    "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 3ms/step\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib qt\n",
+    "var = 1\n",
+    "plt.plot(testdataset_df['date'][31:],y_test[:,1], label='Original Testing Data', color='blue')\n",
+    "plt.plot(testdataset_df['date'][31:],test_predict1[:,1], label='Predicted Testing Data', color='red',alpha=0.8)\n",
     "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n",
     "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
+    "# from tensorflow.keras.models import load_model\n",
+    "# model.save(\"energy_model_01.h5\") "
    ]
   },
   {
     "plt.plot(df_filtered['date'],df_filtered['zone_025_temp'])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x1fe7e211d90>]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "plt.plot(merged['hvac_S'])\n",
+    "plt.plot(testdataset_df['hvac_S'])\n",
+    "plt.plot(traindataset_df['hvac_S'],'r')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x1fe8ecf5bd0>]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "plt.plot(merged['hvac_N'])\n",
+    "plt.plot(testdataset_df['hvac_N'])\n",
+    "plt.plot(traindataset_df['hvac_N'],'r')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merged.columns.to_list()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.16.1\n"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "LSTM 2.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>date</th>\n",
+       "      <th>hp_hws_temp</th>\n",
+       "      <th>rtu_003_sat_sp_tn</th>\n",
+       "      <th>rtu_003_fltrd_sa_flow_tn</th>\n",
+       "      <th>rtu_003_sa_temp</th>\n",
+       "      <th>rtu_003_pa_static_stpt_tn</th>\n",
+       "      <th>rtu_003_oa_flow_tn</th>\n",
+       "      <th>rtu_003_oadmpr_pct</th>\n",
+       "      <th>rtu_003_econ_stpt_tn</th>\n",
+       "      <th>rtu_003_ra_temp</th>\n",
+       "      <th>...</th>\n",
+       "      <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
+       "      <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
+       "      <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
+       "      <th>wifi_third_south</th>\n",
+       "      <th>wifi_fourth_south</th>\n",
+       "      <th>air_temp_set_1</th>\n",
+       "      <th>air_temp_set_2</th>\n",
+       "      <th>dew_point_temperature_set_1d</th>\n",
+       "      <th>relative_humidity_set_1</th>\n",
+       "      <th>solar_radiation_set_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2018-01-01 00:00:00</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>13558.539</td>\n",
+       "      <td>65.5</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>67.9</td>\n",
+       "      <td>...</td>\n",
+       "      <td>49.9</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11.64</td>\n",
+       "      <td>11.51</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>79.07</td>\n",
+       "      <td>86.7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2018-01-01 00:01:00</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>13592.909</td>\n",
+       "      <td>65.6</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>5992.059572</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>67.9</td>\n",
+       "      <td>...</td>\n",
+       "      <td>49.4</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11.64</td>\n",
+       "      <td>11.51</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>79.07</td>\n",
+       "      <td>86.7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  date  hp_hws_temp  rtu_003_sat_sp_tn  \\\n",
+       "0  2018-01-01 00:00:00         75.3               65.0   \n",
+       "1  2018-01-01 00:01:00         75.3               65.0   \n",
+       "\n",
+       "   rtu_003_fltrd_sa_flow_tn  rtu_003_sa_temp  rtu_003_pa_static_stpt_tn  \\\n",
+       "0                 13558.539             65.5                        0.6   \n",
+       "1                 13592.909             65.6                        0.6   \n",
+       "\n",
+       "   rtu_003_oa_flow_tn  rtu_003_oadmpr_pct  rtu_003_econ_stpt_tn  \\\n",
+       "0            0.000000                34.6                  65.0   \n",
+       "1         5992.059572                34.6                  65.0   \n",
+       "\n",
+       "   rtu_003_ra_temp  ...  rtu_003_rf_vfd_spd_fbk_tn  \\\n",
+       "0             67.9  ...                       49.9   \n",
+       "1             67.9  ...                       49.4   \n",
+       "\n",
+       "   rtu_003_fltrd_gnd_lvl_plenum_press_tn  rtu_003_fltrd_lvl2_plenum_press_tn  \\\n",
+       "0                                   0.04                                0.05   \n",
+       "1                                   0.04                                0.04   \n",
+       "\n",
+       "   wifi_third_south  wifi_fourth_south  air_temp_set_1  air_temp_set_2  \\\n",
+       "0               NaN                NaN           11.64           11.51   \n",
+       "1               NaN                NaN           11.64           11.51   \n",
+       "\n",
+       "   dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
+       "0                           8.1                    79.07   \n",
+       "1                           8.1                    79.07   \n",
+       "\n",
+       "   solar_radiation_set_1  \n",
+       "0                   86.7  \n",
+       "1                   86.7  \n",
+       "\n",
+       "[2 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtu = [\"rtu_003\"]\n",
+    "# wing = [\"hvac_N\",\"hvac_S\"]\n",
+    "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
+    "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
+    "[\"rtu_003_ma_temp\",]\n",
+    "# any(sub in col for sub in zone) or\n",
+    "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n",
+    "                               any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n",
+    "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
+    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
+    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
+    "# df_filtered = df_filtered.dropna()\n",
+    "df_filtered.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
+    "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2019, 4, 1)) & (df_filtered.date.dt.date< date(2020, 2, 15))]\n",
+    "# df_filtered.isna().sum()\n",
+    "if df_filtered.isna().any().any():\n",
+    "    print(\"There are NA values in the DataFrame columns.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n",
+    " 'rtu_003_sa_temp',\n",
+    " 'rtu_003_oadmpr_pct',\n",
+    " 'rtu_003_ra_temp',\n",
+    " 'rtu_003_oa_temp',\n",
+    " 'rtu_003_ma_temp',\n",
+    " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
+    " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
+    " 'wifi_fourth_south',\n",
+    " 'air_temp_set_1',\n",
+    " 'air_temp_set_2',\n",
+    " 'dew_point_temperature_set_1d',\n",
+    " 'relative_humidity_set_1',\n",
+    " 'solar_radiation_set_1']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 188,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 188,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "testdataset_df = df_filtered[(df_filtered.date.dt.date <date(2019, 8, 21))]\n",
+    "\n",
+    "traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
+    "# .ewm(com = 1000,adjust=True).mean()\n",
+    "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n",
+    "\n",
+    "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n",
+    "\n",
+    "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
+    "columns_with_na"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 189,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traindataset = traindataset.astype('float32')\n",
+    "testdataset = testdataset.astype('float32')\n",
+    "\n",
+    "scaler = StandardScaler()\n",
+    "traindataset = scaler.fit_transform(traindataset)\n",
+    "testdataset = scaler.transform(testdataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 191,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/5\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
+      "  super().__init__(**kwargs)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.1413\n",
+      "Epoch 1: val_loss improved from inf to 0.52256, saving model to lstm_smooth_01.keras\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━��━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.1412 - val_loss: 0.5226\n",
+      "Epoch 2/5\n",
+      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0393\n",
+      "Epoch 2: val_loss improved from 0.52256 to 0.50228, saving model to lstm_smooth_01.keras\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0393 - val_loss: 0.5023\n",
+      "Epoch 3/5\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0347\n",
+      "Epoch 3: val_loss improved from 0.50228 to 0.48711, saving model to lstm_smooth_01.keras\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0347 - val_loss: 0.4871\n",
+      "Epoch 4/5\n",
+      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0319\n",
+      "Epoch 4: val_loss did not improve from 0.48711\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.0319 - val_loss: 0.4958\n",
+      "Epoch 5/5\n",
+      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0303\n",
+      "Epoch 5: val_loss did not improve from 0.48711\n",
+      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0303 - val_loss: 0.5026\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.src.callbacks.history.History at 0x1d460bb83d0>"
+      ]
+     },
+     "execution_count": 191,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train,test = traindataset,testdataset\n",
+    "\n",
+    "def create_dataset(dataset,time_step):\n",
+    "    x = [[] for _ in range(15)] \n",
+    "    Y = []\n",
+    "    for i in range(len(dataset) - time_step - 1):\n",
+    "        for j in range(15):\n",
+    "            x[j].append(dataset[i:(i + time_step), j])\n",
+    "        Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n",
+    "                  dataset[i + time_step, 6],dataset[i + time_step, 7]])\n",
+    "    x= [np.array(feature_list) for feature_list in x]\n",
+    "    Y = np.reshape(Y,(len(Y),8))\n",
+    "    return np.stack(x,axis=2),Y\n",
+    "\n",
+    "time_step = 30\n",
+    "X_train, y_train = create_dataset(train, time_step)\n",
+    "X_test, y_test = create_dataset(test, time_step)\n",
+    "\n",
+    "\n",
+    "model = Sequential()\n",
+    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
+    "model.add(LSTM(units=50, return_sequences=True))\n",
+    "model.add(LSTM(units=30))\n",
+    "model.add(Dense(units=8))\n",
+    "\n",
+    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
+    "\n",
+    "checkpoint_path = \"lstm_smooth_01.keras\"\n",
+    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
+    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 192,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m23s\u001b[0m 4ms/step\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_predict1 = model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 193,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib qt\n",
+    "var = 0\n",
+    "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
+    "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
+    "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)[var]\n",
+    "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
+    "\n",
+    "\n",
+    "plt.title('Testing Data - Predicted vs Actual')\n",
+    "plt.xlabel('Time')\n",
+    "plt.ylabel('Value')\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 176,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x1d334841450>]"
+      ]
+     },
+     "execution_count": 176,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "var = 0\n",
+    "plt.plot((test_predict1 - y_test)[:,var])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = ['hp_hws_temp',\n",
+    " 'rtu_003_sa_temp',\n",
+    " 'rtu_003_oadmpr_pct',\n",
+    " 'rtu_003_ra_temp',\n",
+    " 'rtu_003_oa_temp',\n",
+    " 'rtu_003_ma_temp',\n",
+    " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
+    " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
+    " 'wifi_fourth_south',\n",
+    " 'air_temp_set_1',\n",
+    " 'air_temp_set_2',\n",
+    " 'dew_point_temperature_set_1d',\n",
+    " 'relative_humidity_set_1',\n",
+    " 'solar_radiation_set_1']\n",
+    "\n",
+    "idx_2_params = {}\n",
+    "for i, param in enumerate(params):\n",
+    "    idx_2_params[i] = param"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{0: 'hp_hws_temp',\n",
+       " 1: 'rtu_003_sa_temp',\n",
+       " 2: 'rtu_003_oadmpr_pct',\n",
+       " 3: 'rtu_003_ra_temp',\n",
+       " 4: 'rtu_003_oa_temp',\n",
+       " 5: 'rtu_003_ma_temp',\n",
+       " 6: 'rtu_003_sf_vfd_spd_fbk_tn',\n",
+       " 7: 'rtu_003_rf_vfd_spd_fbk_tn',\n",
+       " 8: 'wifi_third_south',\n",
+       " 9: 'wifi_fourth_south',\n",
+       " 10: 'air_temp_set_1',\n",
+       " 11: 'air_temp_set_2',\n",
+       " 12: 'dew_point_temperature_set_1d',\n",
+       " 13: 'relative_humidity_set_1',\n",
+       " 14: 'solar_radiation_set_1'}"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "idx_2_params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "KMEANS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 194,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.cluster import KMeans\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.decomposition import PCA\n",
+    "# Generating random data for demonstration\n",
+    "np.random.seed(0)\n",
+    "X = test_predict1 - y_test\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "k = 3\n",
+    "\n",
+    "kmeans = KMeans(n_clusters=k)\n",
+    "\n",
+    "kmeans.fit(X)\n",
+    "\n",
+    "# Getting the cluster centers and labels\n",
+    "centroids = kmeans.cluster_centers_\n",
+    "labels = kmeans.labels_\n",
+    "\n",
+    "# Plotting the data points and cluster centers\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5, edgecolors='k')\n",
+    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
+    "plt.title('KMeans Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x1d33ccad250>]"
+      ]
+     },
+     "execution_count": 109,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "plt.plot((test_predict1 - y_test)[:,2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 5.8607887e-02, -2.4713947e-01,  2.4978706e-01, -7.8289807e-01,\n",
+       "        -2.0218764e-01, -2.8860569e-01,  2.7817219e-01,  2.4209845e-01],\n",
+       "       [-2.6845999e-02,  1.2596852e-01,  9.6294099e-01,  2.0099232e-01,\n",
+       "         3.3391420e-02,  7.7613303e-04, -7.1204931e-02, -9.7836025e-02]],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 77,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pca.components_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 204,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = 60\n",
+    "X= test_predict1 - y_test\n",
+    "processed_data = []\n",
+    "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
+    "for i in range(0,len(X), 30 ):\n",
+    "    mean = X[i:i+k].mean(axis = 0)\n",
+    "    std = X[i:i+k].std(axis = 0)\n",
+    "    max = X[i:i+k].max(axis = 0)\n",
+    "    min = X[i:i+k].min(axis = 0)\n",
+    "    iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
+    "    data = np.concatenate([mean, std, max, min, iqr])\n",
+    "    processed_data.append([data])\n",
+    "processed_data = np.concatenate(processed_data,axis=0) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 197,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = processed_data\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "k = 4\n",
+    "\n",
+    "kmeans = KMeans(n_clusters=k)\n",
+    "\n",
+    "kmeans.fit(X)\n",
+    "\n",
+    "# Getting the cluster centers and labels\n",
+    "centroids = kmeans.cluster_centers_\n",
+    "labels = kmeans.labels_\n",
+    "\n",
+    "# Plotting the data points and cluster centers\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
+    "plt.title('KMeans Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 167,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "dd = df_filtered.drop(columns=[\"date\"],inplace=False)\n",
+    "dg = dd.ewm(com = 1000,adjust=True).mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 168,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>date</th>\n",
+       "      <th>hp_hws_temp</th>\n",
+       "      <th>rtu_003_sa_temp</th>\n",
+       "      <th>rtu_003_oadmpr_pct</th>\n",
+       "      <th>rtu_003_ra_temp</th>\n",
+       "      <th>rtu_003_oa_temp</th>\n",
+       "      <th>rtu_003_ma_temp</th>\n",
+       "      <th>rtu_003_sf_vfd_spd_fbk_tn</th>\n",
+       "      <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
+       "      <th>wifi_third_south</th>\n",
+       "      <th>wifi_fourth_south</th>\n",
+       "      <th>air_temp_set_1</th>\n",
+       "      <th>air_temp_set_2</th>\n",
+       "      <th>dew_point_temperature_set_1d</th>\n",
+       "      <th>relative_humidity_set_1</th>\n",
+       "      <th>solar_radiation_set_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>555845</th>\n",
+       "      <td>2019-04-02 00:00:00</td>\n",
+       "      <td>120.7</td>\n",
+       "      <td>66.7</td>\n",
+       "      <td>84.4</td>\n",
+       "      <td>72.2</td>\n",
+       "      <td>59.2</td>\n",
+       "      <td>66.7</td>\n",
+       "      <td>79.6</td>\n",
+       "      <td>53.7</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>15.67</td>\n",
+       "      <td>14.92</td>\n",
+       "      <td>11.77</td>\n",
+       "      <td>77.80</td>\n",
+       "      <td>147.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>555846</th>\n",
+       "      <td>2019-04-02 00:01:00</td>\n",
+       "      <td>120.4</td>\n",
+       "      <td>65.8</td>\n",
+       "      <td>85.4</td>\n",
+       "      <td>72.2</td>\n",
+       "      <td>59.5</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>78.1</td>\n",
+       "      <td>54.4</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>15.67</td>\n",
+       "      <td>14.92</td>\n",
+       "      <td>11.77</td>\n",
+       "      <td>77.80</td>\n",
+       "      <td>147.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>555847</th>\n",
+       "      <td>2019-04-02 00:02:00</td>\n",
+       "      <td>120.1</td>\n",
+       "      <td>65.1</td>\n",
+       "      <td>66.2</td>\n",
+       "      <td>72.1</td>\n",
+       "      <td>59.4</td>\n",
+       "      <td>64.0</td>\n",
+       "      <td>78.0</td>\n",
+       "      <td>60.1</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>15.67</td>\n",
+       "      <td>14.92</td>\n",
+       "      <td>11.77</td>\n",
+       "      <td>77.80</td>\n",
+       "      <td>147.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>555848</th>\n",
+       "      <td>2019-04-02 00:03:00</td>\n",
+       "      <td>119.6</td>\n",
+       "      <td>64.9</td>\n",
+       "      <td>56.0</td>\n",
+       "      <td>72.2</td>\n",
+       "      <td>59.4</td>\n",
+       "      <td>65.7</td>\n",
+       "      <td>79.1</td>\n",
+       "      <td>55.5</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>15.67</td>\n",
+       "      <td>14.92</td>\n",
+       "      <td>11.77</td>\n",
+       "      <td>77.80</td>\n",
+       "      <td>147.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>555849</th>\n",
+       "      <td>2019-04-02 00:04:00</td>\n",
+       "      <td>119.3</td>\n",
+       "      <td>65.5</td>\n",
+       "      <td>54.6</td>\n",
+       "      <td>72.0</td>\n",
+       "      <td>59.2</td>\n",
+       "      <td>67.1</td>\n",
+       "      <td>75.2</td>\n",
+       "      <td>53.1</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>15.67</td>\n",
+       "      <td>14.92</td>\n",
+       "      <td>11.77</td>\n",
+       "      <td>77.80</td>\n",
+       "      <td>147.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1080190</th>\n",
+       "      <td>2020-02-14 23:57:00</td>\n",
+       "      <td>121.9</td>\n",
+       "      <td>67.3</td>\n",
+       "      <td>52.8</td>\n",
+       "      <td>73.2</td>\n",
+       "      <td>63.5</td>\n",
+       "      <td>69.2</td>\n",
+       "      <td>80.9</td>\n",
+       "      <td>61.3</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>16.42</td>\n",
+       "      <td>13.93</td>\n",
+       "      <td>6.93</td>\n",
+       "      <td>53.66</td>\n",
+       "      <td>347.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1080191</th>\n",
+       "      <td>2020-02-14 23:58:00</td>\n",
+       "      <td>122.7</td>\n",
+       "      <td>69.2</td>\n",
+       "      <td>64.8</td>\n",
+       "      <td>73.3</td>\n",
+       "      <td>63.4</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>53.8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>16.42</td>\n",
+       "      <td>13.93</td>\n",
+       "      <td>6.93</td>\n",
+       "      <td>53.66</td>\n",
+       "      <td>347.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1080192</th>\n",
+       "      <td>2020-02-14 23:58:00</td>\n",
+       "      <td>122.7</td>\n",
+       "      <td>69.2</td>\n",
+       "      <td>64.8</td>\n",
+       "      <td>73.3</td>\n",
+       "      <td>63.4</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>53.8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>16.42</td>\n",
+       "      <td>13.93</td>\n",
+       "      <td>6.93</td>\n",
+       "      <td>53.66</td>\n",
+       "      <td>347.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1080193</th>\n",
+       "      <td>2020-02-14 23:59:00</td>\n",
+       "      <td>122.9</td>\n",
+       "      <td>68.7</td>\n",
+       "      <td>80.8</td>\n",
+       "      <td>73.3</td>\n",
+       "      <td>63.1</td>\n",
+       "      <td>67.3</td>\n",
+       "      <td>82.2</td>\n",
+       "      <td>60.1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>16.42</td>\n",
+       "      <td>13.93</td>\n",
+       "      <td>6.93</td>\n",
+       "      <td>53.66</td>\n",
+       "      <td>347.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1080194</th>\n",
+       "      <td>2020-02-14 23:59:00</td>\n",
+       "      <td>122.9</td>\n",
+       "      <td>68.7</td>\n",
+       "      <td>80.8</td>\n",
+       "      <td>73.3</td>\n",
+       "      <td>63.1</td>\n",
+       "      <td>67.3</td>\n",
+       "      <td>82.2</td>\n",
+       "      <td>60.1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>16.42</td>\n",
+       "      <td>13.93</td>\n",
+       "      <td>6.93</td>\n",
+       "      <td>53.66</td>\n",
+       "      <td>347.9</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>524350 rows × 16 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       date  hp_hws_temp  rtu_003_sa_temp  rtu_003_oadmpr_pct  \\\n",
+       "555845  2019-04-02 00:00:00        120.7             66.7                84.4   \n",
+       "555846  2019-04-02 00:01:00        120.4             65.8                85.4   \n",
+       "555847  2019-04-02 00:02:00        120.1             65.1                66.2   \n",
+       "555848  2019-04-02 00:03:00        119.6             64.9                56.0   \n",
+       "555849  2019-04-02 00:04:00        119.3             65.5                54.6   \n",
+       "...                     ...          ...              ...                 ...   \n",
+       "1080190 2020-02-14 23:57:00        121.9             67.3                52.8   \n",
+       "1080191 2020-02-14 23:58:00        122.7             69.2                64.8   \n",
+       "1080192 2020-02-14 23:58:00        122.7             69.2                64.8   \n",
+       "1080193 2020-02-14 23:59:00        122.9             68.7                80.8   \n",
+       "1080194 2020-02-14 23:59:00        122.9             68.7                80.8   \n",
+       "\n",
+       "         rtu_003_ra_temp  rtu_003_oa_temp  rtu_003_ma_temp  \\\n",
+       "555845              72.2             59.2             66.7   \n",
+       "555846              72.2             59.5             65.0   \n",
+       "555847              72.1             59.4             64.0   \n",
+       "555848              72.2             59.4             65.7   \n",
+       "555849              72.0             59.2             67.1   \n",
+       "...                  ...              ...              ...   \n",
+       "1080190             73.2             63.5             69.2   \n",
+       "1080191             73.3             63.4             70.0   \n",
+       "1080192             73.3             63.4             70.0   \n",
+       "1080193             73.3             63.1             67.3   \n",
+       "1080194             73.3             63.1             67.3   \n",
+       "\n",
+       "         rtu_003_sf_vfd_spd_fbk_tn  rtu_003_rf_vfd_spd_fbk_tn  \\\n",
+       "555845                        79.6                       53.7   \n",
+       "555846                        78.1                       54.4   \n",
+       "555847                        78.0                       60.1   \n",
+       "555848                        79.1                       55.5   \n",
+       "555849                        75.2                       53.1   \n",
+       "...                            ...                        ...   \n",
+       "1080190                       80.9                       61.3   \n",
+       "1080191                       81.0                       53.8   \n",
+       "1080192                       81.0                       53.8   \n",
+       "1080193                       82.2                       60.1   \n",
+       "1080194                       82.2                       60.1   \n",
+       "\n",
+       "         wifi_third_south  wifi_fourth_south  air_temp_set_1  air_temp_set_2  \\\n",
+       "555845               34.0               31.0           15.67           14.92   \n",
+       "555846               34.0               31.0           15.67           14.92   \n",
+       "555847               34.0               31.0           15.67           14.92   \n",
+       "555848               34.0               31.0           15.67           14.92   \n",
+       "555849               34.0               31.0           15.67           14.92   \n",
+       "...                   ...                ...             ...             ...   \n",
+       "1080190               0.0                0.0           16.42           13.93   \n",
+       "1080191               0.0                0.0           16.42           13.93   \n",
+       "1080192               0.0                0.0           16.42           13.93   \n",
+       "1080193               0.0                0.0           16.42           13.93   \n",
+       "1080194               0.0                0.0           16.42           13.93   \n",
+       "\n",
+       "         dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
+       "555845                          11.77                    77.80   \n",
+       "555846                          11.77                    77.80   \n",
+       "555847                          11.77                    77.80   \n",
+       "555848                          11.77                    77.80   \n",
+       "555849                          11.77                    77.80   \n",
+       "...                               ...                      ...   \n",
+       "1080190                          6.93                    53.66   \n",
+       "1080191                          6.93                    53.66   \n",
+       "1080192                          6.93                    53.66   \n",
+       "1080193                          6.93                    53.66   \n",
+       "1080194                          6.93                    53.66   \n",
+       "\n",
+       "         solar_radiation_set_1  \n",
+       "555845                   147.1  \n",
+       "555846                   147.1  \n",
+       "555847                   147.1  \n",
+       "555848                   147.1  \n",
+       "555849                   147.1  \n",
+       "...                        ...  \n",
+       "1080190                  347.9  \n",
+       "1080191                  347.9  \n",
+       "1080192                  347.9  \n",
+       "1080193                  347.9  \n",
+       "1080194                  347.9  \n",
+       "\n",
+       "[524350 rows x 16 columns]"
+      ]
+     },
+     "execution_count": 168,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_filtered"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 170,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x1d34127ba90>]"
+      ]
+     },
+     "execution_count": 170,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "plt.plot(dd[\"hp_hws_temp\"])\n",
+    "plt.plot(dg[\"hp_hws_temp\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 202,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.mixture import GaussianMixture\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Generating random data for demonstration\n",
+    "np.random.seed(0)\n",
+    "X = processed_data\n",
+    "\n",
+    "# Creating the GMM instance with desired number of clusters\n",
+    "gmm = GaussianMixture(n_components=2)\n",
+    "\n",
+    "# Fitting the model to the data\n",
+    "gmm.fit(X)\n",
+    "\n",
+    "# Getting the cluster labels\n",
+    "labels = gmm.predict(X)\n",
+    "\n",
+    "# Plotting the data points with colors representing different clusters\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.title('GMM Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,