{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "from datetime import datetime \n",
    "from datetime import date\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from keras.models import Sequential\n",
    "from keras.layers import LSTM, Dense\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import MinMaxScaler\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>zone_047_hw_valve</th>\n",
       "      <th>rtu_004_sat_sp_tn</th>\n",
       "      <th>zone_047_temp</th>\n",
       "      <th>zone_047_fan_spd</th>\n",
       "      <th>rtu_004_fltrd_sa_flow_tn</th>\n",
       "      <th>rtu_004_sa_temp</th>\n",
       "      <th>rtu_004_pa_static_stpt_tn</th>\n",
       "      <th>rtu_004_oa_flow_tn</th>\n",
       "      <th>rtu_004_oadmpr_pct</th>\n",
       "      <th>...</th>\n",
       "      <th>zone_047_heating_sp</th>\n",
       "      <th>Unnamed: 47_y</th>\n",
       "      <th>hvac_S</th>\n",
       "      <th>hp_hws_temp</th>\n",
       "      <th>aru_001_cwr_temp</th>\n",
       "      <th>aru_001_cws_fr_gpm</th>\n",
       "      <th>aru_001_cws_temp</th>\n",
       "      <th>aru_001_hwr_temp</th>\n",
       "      <th>aru_001_hws_fr_gpm</th>\n",
       "      <th>aru_001_hws_temp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-01-01 00:00:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9265.604</td>\n",
       "      <td>66.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-01-01 00:01:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9265.604</td>\n",
       "      <td>66.0</td>\n",
       "      <td>0.06</td>\n",
       "      <td>6572.099162</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2018-01-01 00:02:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9708.240</td>\n",
       "      <td>66.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>7628.832542</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2018-01-01 00:03:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9611.638</td>\n",
       "      <td>66.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>7710.294617</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2018-01-01 00:04:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9215.110</td>\n",
       "      <td>66.0</td>\n",
       "      <td>0.06</td>\n",
       "      <td>7139.184090</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072149</th>\n",
       "      <td>2020-12-31 23:58:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>18884.834</td>\n",
       "      <td>64.4</td>\n",
       "      <td>0.06</td>\n",
       "      <td>2938.320000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072150</th>\n",
       "      <td>2020-12-31 23:58:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>18884.834</td>\n",
       "      <td>64.4</td>\n",
       "      <td>0.06</td>\n",
       "      <td>2938.320000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072151</th>\n",
       "      <td>2020-12-31 23:59:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19345.508</td>\n",
       "      <td>64.3</td>\n",
       "      <td>0.06</td>\n",
       "      <td>3154.390000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072152</th>\n",
       "      <td>2020-12-31 23:59:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19345.508</td>\n",
       "      <td>64.3</td>\n",
       "      <td>0.06</td>\n",
       "      <td>3154.390000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072153</th>\n",
       "      <td>2021-01-01 00:00:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>18650.232</td>\n",
       "      <td>64.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>3076.270000</td>\n",
       "      <td>22.9</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.788947</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2072154 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        date  zone_047_hw_valve  rtu_004_sat_sp_tn  \\\n",
       "0        2018-01-01 00:00:00              100.0               69.0   \n",
       "1        2018-01-01 00:01:00              100.0               69.0   \n",
       "2        2018-01-01 00:02:00              100.0               69.0   \n",
       "3        2018-01-01 00:03:00              100.0               69.0   \n",
       "4        2018-01-01 00:04:00              100.0               69.0   \n",
       "...                      ...                ...                ...   \n",
       "2072149  2020-12-31 23:58:00              100.0               68.0   \n",
       "2072150  2020-12-31 23:58:00              100.0               68.0   \n",
       "2072151  2020-12-31 23:59:00              100.0               68.0   \n",
       "2072152  2020-12-31 23:59:00              100.0               68.0   \n",
       "2072153  2021-01-01 00:00:00              100.0               68.0   \n",
       "\n",
       "         zone_047_temp  zone_047_fan_spd  rtu_004_fltrd_sa_flow_tn  \\\n",
       "0                 67.5              20.0                  9265.604   \n",
       "1                 67.5              20.0                  9265.604   \n",
       "2                 67.5              20.0                  9708.240   \n",
       "3                 67.5              20.0                  9611.638   \n",
       "4                 67.5              20.0                  9215.110   \n",
       "...                ...               ...                       ...   \n",
       "2072149           63.2              20.0                 18884.834   \n",
       "2072150           63.2              20.0                 18884.834   \n",
       "2072151           63.2              20.0                 19345.508   \n",
       "2072152           63.2              20.0                 19345.508   \n",
       "2072153           63.2              20.0                 18650.232   \n",
       "\n",
       "         rtu_004_sa_temp  rtu_004_pa_static_stpt_tn  rtu_004_oa_flow_tn  \\\n",
       "0                   66.1                       0.06            0.000000   \n",
       "1                   66.0                       0.06         6572.099162   \n",
       "2                   66.1                       0.06         7628.832542   \n",
       "3                   66.1                       0.06         7710.294617   \n",
       "4                   66.0                       0.06         7139.184090   \n",
       "...                  ...                        ...                 ...   \n",
       "2072149             64.4                       0.06         2938.320000   \n",
       "2072150             64.4                       0.06         2938.320000   \n",
       "2072151             64.3                       0.06         3154.390000   \n",
       "2072152             64.3                       0.06         3154.390000   \n",
       "2072153             64.1                       0.06         3076.270000   \n",
       "\n",
       "         rtu_004_oadmpr_pct  ...  zone_047_heating_sp  Unnamed: 47_y  \\\n",
       "0                      28.0  ...                  NaN            NaN   \n",
       "1                      28.0  ...                  NaN            NaN   \n",
       "2                      28.0  ...                  NaN            NaN   \n",
       "3                      28.0  ...                  NaN            NaN   \n",
       "4                      28.0  ...                  NaN            NaN   \n",
       "...                     ...  ...                  ...            ...   \n",
       "2072149                23.4  ...                 71.0           69.0   \n",
       "2072150                23.4  ...                 71.0           69.0   \n",
       "2072151                23.4  ...                 71.0           69.0   \n",
       "2072152                23.4  ...                 71.0           69.0   \n",
       "2072153                22.9  ...                 71.0           69.0   \n",
       "\n",
       "            hvac_S  hp_hws_temp  aru_001_cwr_temp  aru_001_cws_fr_gpm  \\\n",
       "0              NaN         75.3               NaN                 NaN   \n",
       "1              NaN         75.3               NaN                 NaN   \n",
       "2              NaN         75.3               NaN                 NaN   \n",
       "3              NaN         75.3               NaN                 NaN   \n",
       "4              NaN         75.3               NaN                 NaN   \n",
       "...            ...          ...               ...                 ...   \n",
       "2072149  23.145000        123.8             56.25               54.71   \n",
       "2072150  23.145000        123.8             56.25               54.71   \n",
       "2072151  23.145000        123.8             56.25               54.71   \n",
       "2072152  23.145000        123.8             56.25               54.71   \n",
       "2072153  23.788947        123.8             56.25               54.71   \n",
       "\n",
       "         aru_001_cws_temp  aru_001_hwr_temp  aru_001_hws_fr_gpm  \\\n",
       "0                     NaN               NaN                 NaN   \n",
       "1                     NaN               NaN                 NaN   \n",
       "2                     NaN               NaN                 NaN   \n",
       "3                     NaN               NaN                 NaN   \n",
       "4                     NaN               NaN                 NaN   \n",
       "...                   ...               ...                 ...   \n",
       "2072149              56.4            123.42                61.6   \n",
       "2072150              56.4            123.42                61.6   \n",
       "2072151              56.4            123.42                61.6   \n",
       "2072152              56.4            123.42                61.6   \n",
       "2072153              56.4            123.42                61.6   \n",
       "\n",
       "         aru_001_hws_temp  \n",
       "0                     NaN  \n",
       "1                     NaN  \n",
       "2                     NaN  \n",
       "3                     NaN  \n",
       "4                     NaN  \n",
       "...                   ...  \n",
       "2072149            122.36  \n",
       "2072150            122.36  \n",
       "2072151            122.36  \n",
       "2072152            122.36  \n",
       "2072153            122.36  \n",
       "\n",
       "[2072154 rows x 30 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged = pd.read_csv(r'C:\\Users\\jerin\\Downloads\\lbnlbldg59\\lbnlbldg59\\lbnlbldg59.processed\\LBNLBLDG59\\clean_Bldg59_2018to2020\\clean data\\long_merge.csv')\n",
    "\n",
    "zone = \"47\"\n",
    "\n",
    "if zone in [\"36\", \"37\", \"38\", \"39\", \"40\", \"41\", \"42\", \"64\", \"65\", \"66\", \"67\", \"68\", \"69\", \"70\"]:\n",
    "    rtu = \"rtu_001\"\n",
    "    wing = \"hvac_N\"\n",
    "elif zone in [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\"]:\n",
    "    rtu = \"rtu_003\"\n",
    "    wing = \"hvac_S\"\n",
    "elif zone in [\"16\", \"17\", \"21\", \"22\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]:\n",
    "    rtu = \"rtu_004\"\n",
    "    wing = \"hvac_S\"\n",
    "else:\n",
    "    rtu = \"rtu_002\"\n",
    "    wing = \"hvac_N\"\n",
    "#merged is the dataframe\n",
    "sorted = merged[[\"date\"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+[\"hp_hws_temp\", \"aru_001_cwr_temp\" , \"aru_001_cws_fr_gpm\" ,\"aru_001_cws_temp\",\"aru_001_hwr_temp\" ,\"aru_001_hws_fr_gpm\" ,\"aru_001_hws_temp\"]]\n",
    "sorted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "correlation_matrix = sorted.loc[:, sorted.columns != 'date'].corr()\n",
    "plt.figure(figsize=(15, 10))\n",
    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=\".2f\", linewidths=0.5)\n",
    "plt.title('Pearson Correlation Coefficients')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "zone_047_fan_spd ---- 0.3838842710385038 ---- zone_047_temp\n",
      "rtu_004_sa_temp ---- 0.5636316174287519 ---- zone_047_temp\n",
      "rtu_004_ra_temp ---- 0.32776265464886917 ---- zone_047_temp\n",
      "rtu_004_oa_temp ---- 0.3911499150089511 ---- zone_047_temp\n",
      "rtu_004_ma_temp ---- 0.3800818291020465 ---- zone_047_temp\n",
      "hvac_S ---- 0.3163506114497974 ---- zone_047_hw_valve\n",
      "hvac_S ---- 0.42500326788919984 ---- rtu_004_fltrd_sa_flow_tn\n",
      "hvac_S ---- 0.4794994590105312 ---- rtu_004_oa_temp\n",
      "hvac_S ---- 0.37653522078249596 ---- rtu_004_ma_temp\n",
      "hvac_S ---- 0.45054590590454646 ---- rtu_004_sf_vfd_spd_fbk_tn\n",
      "hvac_S ---- 0.3910776435479394 ---- rtu_004_rf_vfd_spd_fbk_tn\n",
      "aru_001_cwr_temp ---- 0.4337890009515319 ---- zone_047_temp\n",
      "aru_001_cwr_temp ---- 0.5103744910713975 ---- hvac_S\n",
      "aru_001_cws_fr_gpm ---- 0.5251959795850137 ---- zone_047_temp\n",
      "aru_001_cws_fr_gpm ---- 0.4816297584385553 ---- hvac_S\n",
      "aru_001_cws_temp ---- 0.576461860142355 ---- zone_047_temp\n",
      "aru_001_cws_temp ---- 0.5060071970556257 ---- hvac_S\n"
     ]
    }
   ],
   "source": [
    "highly_correlated_cols = set()\n",
    "for i in range(len(correlation_matrix.columns)):\n",
    "    for j in range(i):\n",
    "        if abs(correlation_matrix.iloc[i, j]) > 0.3:\n",
    "            colname_i = correlation_matrix.columns[i]\n",
    "            colname_j = correlation_matrix.columns[j]\n",
    "            if (colname_i != colname_j) and (colname_i==\"zone_047_temp\" or colname_j==\"zone_047_temp\" or colname_i==\"hvac_S\" or colname_j==\"hvac_S\"):\n",
    "                print(colname_i,\"----\",abs(correlation_matrix.iloc[i, j]),\"----\",colname_j)\n",
    "                highly_correlated_cols.add(colname_i)\n",
    "                highly_correlated_cols.add(colname_j)\n",
    "                \n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "date                                          0\n",
       "zone_047_hw_valve                             0\n",
       "rtu_004_sat_sp_tn                             0\n",
       "zone_047_temp                                 0\n",
       "zone_047_fan_spd                              0\n",
       "rtu_004_fltrd_sa_flow_tn                      0\n",
       "rtu_004_sa_temp                               0\n",
       "rtu_004_pa_static_stpt_tn                     0\n",
       "rtu_004_oa_flow_tn                            0\n",
       "rtu_004_oadmpr_pct                            0\n",
       "rtu_004_econ_stpt_tn                          0\n",
       "rtu_004_ra_temp                               0\n",
       "rtu_004_oa_temp                               0\n",
       "rtu_004_ma_temp                               0\n",
       "rtu_004_sf_vfd_spd_fbk_tn                     0\n",
       "rtu_004_rf_vfd_spd_fbk_tn                     0\n",
       "rtu_004_fltrd_gnd_lvl_plenum_press_tn         0\n",
       "rtu_004_fltrd_lvl2_plenum_press_tn            0\n",
       "zone_047_cooling_sp                           0\n",
       "Unnamed: 47_x                                 0\n",
       "zone_047_heating_sp                           0\n",
       "Unnamed: 47_y                                 0\n",
       "hvac_S                                        0\n",
       "hp_hws_temp                                   0\n",
       "aru_001_cwr_temp                         667858\n",
       "aru_001_cws_fr_gpm                       667858\n",
       "aru_001_cws_temp                         667858\n",
       "aru_001_hwr_temp                              0\n",
       "aru_001_hws_fr_gpm                            0\n",
       "aru_001_hws_temp                              0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_df = sorted.copy()\n",
    "final_df['date'] = pd.to_datetime(final_df['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
    "final_df = final_df[ (final_df.date.dt.date >date(2020, 1, 1)) & (final_df.date.dt.date< date(2020, 12, 30))]\n",
    "final_df.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "for i in final_df.columns[11:14]:\n",
    "    plt.plot(final_df['date'],final_df[i])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
      "  super().__init__(**kwargs)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/2\n",
      "\u001b[1m12174/12174\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m811s\u001b[0m 66ms/step - loss: 0.0019 - val_loss: 9.6280e-04\n",
      "Epoch 2/2\n",
      "\u001b[1m12174/12174\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m814s\u001b[0m 67ms/step - loss: 7.9909e-04 - val_loss: 7.6609e-04\n",
      "\u001b[1m24348/24348\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m120s\u001b[0m 5ms/step\n",
      "\u001b[1m10434/10434\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m58s\u001b[0m 6ms/step\n"
     ]
    }
   ],
   "source": [
    "\n",
    "dataset = final_df[['rtu_004_oa_temp','rtu_004_ra_temp','hp_hws_temp','rtu_004_ma_temp','rtu_004_sa_temp']].values\n",
    "\n",
    "# dataset = final_df[['hvac_S','rtu_004_ra_temp','rtu_004_oa_temp','rtu_004_ma_temp','rtu_004_fltrd_sa_flow_tn',\n",
    "#                     'rtu_004_sf_vfd_spd_fbk_tn','rtu_004_rf_vfd_spd_fbk_tn','zone_047_temp']].values\n",
    "# dataset = final_df[['rtu_004_fltrd_sa_flow_tn','rtu_004_sf_vfd_spd_fbk_tn','rtu_004_rf_vfd_spd_fbk_tn',\n",
    "#                     'rtu_004_oa_temp','rtu_004_ma_temp','zone_047_fan_spd','zone_047_hw_valve','rtu_004_sa_temp','zone_047_temp']].values\n",
    "dataset = dataset.astype('float32')\n",
    "\n",
    "\n",
    "scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "dataset = scaler.fit_transform(dataset)\n",
    "train_size = int(len(dataset)* 0.30)\n",
    "test_size = len(dataset) - train_size\n",
    "test,train = dataset[0:train_size,:],dataset[train_size:len(dataset),:]\n",
    "\n",
    "def create_dataset(dataset,time_step):\n",
    "    # x1,x2,x3,x4,x5,x6,x7, Y = [],[],[],[],[],[],[],[]\n",
    "    x1,x2,x3,Y = [],[],[],[]\n",
    "    for i in range(len(dataset)-time_step-1):\n",
    "        x1.append(dataset[i:(i+time_step), 0])\n",
    "        x2.append(dataset[i:(i+time_step), 1])\n",
    "        x3.append(dataset[i:(i+time_step), 2])\n",
    "        # x4.append(dataset[i:(i+time_step), 3])\n",
    "        # x5.append(dataset[i:(i+time_step), 4])\n",
    "        # x6.append(dataset[i:(i+time_step), 5])\n",
    "        # x7.append(dataset[i:(i+time_step), 6])\n",
    "        Y.append([dataset[i + time_step, 3],dataset[i + time_step, 4]])\n",
    "    # x1,x2,x3,x4,x5,x6,x7,Y =  np.array(x1),np.array(x2),np.array(x3), np.array(x4),np.array(x5),np.array(x6),np.array(x7),np.array(Y)\n",
    "    x1,x2,x3,Y = np.array(x1),np.array(x2),np.array(x3),np.array(Y)\n",
    "    # Y = np.reshape(Y,(len(Y),1))\n",
    "    return np.stack([x1,x2,x3],axis=2),Y\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "time_step = 60\n",
    "X_train, y_train = create_dataset(train, time_step)\n",
    "X_test, y_test = create_dataset(test, time_step)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
    "model.add(LSTM(units=50))\n",
    "model.add(Dense(units=2))\n",
    "\n",
    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
    "\n",
    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1)\n",
    "\n",
    "train_predict = model.predict(X_train)\n",
    "test_predict = model.predict(X_test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "\n",
    "# plt.plot(y_test[:,0], label='Original Testing Data', color='blue')\n",
    "# plt.plot(test_predict[:,0], label='Predicted Testing Data', color='red')\n",
    "plt.plot(y_test[:,1], label='Original Testing Data', color='green')\n",
    "plt.plot(test_predict[:,1], label='Predicted Testing Data', color='orange')\n",
    "anomalies = np.where(abs(test_predict[:,1] - y_test[:,0]) > 0.5)[0]\n",
    "plt.scatter(anomalies,test_predict[anomalies,1], color='black',marker =\"o\",s=100 )\n",
    "plt.title('Testing Data - Predicted vs Actual')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Value')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LSTM autoencoder"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "----------------------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 246,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
      "  super().__init__(**kwargs)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m11487/24348\u001b[0m \u001b[32m━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━━━━\u001b[0m \u001b[1m2:07\u001b[0m 10ms/step"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "\n",
    "# dataset = final_df[['zone_047_temp','hvac_S','rtu_004_sa_temp']].values\n",
    "\n",
    "# dataset = final_df[['hvac_S','rtu_004_ra_temp','rtu_004_oa_temp','rtu_004_ma_temp','rtu_004_fltrd_sa_flow_tn',\n",
    "#                     'rtu_004_sf_vfd_spd_fbk_tn','rtu_004_rf_vfd_spd_fbk_tn','zone_047_temp']].values\n",
    "dataset = final_df[['rtu_004_fltrd_sa_flow_tn','rtu_004_sf_vfd_spd_fbk_tn','rtu_004_rf_vfd_spd_fbk_tn',\n",
    "                    'rtu_004_oa_temp','rtu_004_ma_temp','zone_047_fan_spd','zone_047_hw_valve','rtu_004_ra_temp','rtu_004_sa_temp','zone_047_temp']].values\n",
    "dataset = dataset.astype('float32')\n",
    "\n",
    "\n",
    "scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "dataset = scaler.fit_transform(dataset)\n",
    "test_size = int(len(dataset)* 0.30)\n",
    "test, train = dataset[0:test_size,:],dataset[test_size:len(dataset),:]\n",
    "\n",
    "def create_dataset(dataset,time_step):\n",
    "    x1,x2,x3,x4,x5,x6,x7,x8,x9, Y = [],[],[],[],[],[],[],[],[],[]\n",
    "\n",
    "    for i in range(0,len(dataset)-time_step-1):\n",
    "        x1.append(dataset[i:(i+time_step), 0])\n",
    "        x2.append(dataset[i:(i+time_step), 1])\n",
    "        x3.append(dataset[i:(i+time_step), 2])\n",
    "        x4.append(dataset[i:(i+time_step), 3])\n",
    "        x5.append(dataset[i:(i+time_step), 4])\n",
    "        x6.append(dataset[i:(i+time_step), 5])\n",
    "        x7.append(dataset[i:(i+time_step), 6])\n",
    "        x8.append(dataset[i:(i+time_step), 7])\n",
    "        x9.append(dataset[i:(i+time_step), 8])\n",
    "        Y.append(dataset[i:(i+time_step), 8])\n",
    "    x1,x2,x3,x4,x5,x6,x7,x8,x9,Y =  np.array(x1),np.array(x2),np.array(x3), np.array(x4),np.array(x5),np.array(x6),np.array(x7),np.array(x8),np.array(x9),np.array(Y)\n",
    "   \n",
    "    # Y = np.reshape(Y,(len(Y),1))\n",
    "    return np.stack([x1,x2,x3,x4,x5,x6,x7,x8,x9],axis=2),Y\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "time_step = 60\n",
    "X_train, y_train = create_dataset(train, time_step)\n",
    "X_test, y_test = create_dataset(test, time_step)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
    "# model.add(LSTM(units=30))\n",
    "# model.add(Dense(units=time_step))\n",
    "\n",
    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
    "\n",
    "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=64, verbose=1)\n",
    "\n",
    "train_predict = model.predict(X_train)\n",
    "# test_predict = model.predict(X_test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 244,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(779111, 60, 9)"
      ]
     },
     "execution_count": 244,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 241,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "time = 10\n",
    "mse = (y_test[time] - test_predict[0])**2\n",
    "anomalies = np.where(mse > 0.0001)[0]\n",
    "plt.plot(y_test[time], label='Original Data')\n",
    "plt.plot(test_predict[time], label='predicted Data')\n",
    "plt.scatter(anomalies,test_predict[time,anomalies], color='red', label='Anomalies')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_19\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"sequential_19\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ lstm_39 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>)                  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">60</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">50</span>)         │        <span style=\"color: #00af00; text-decoration-color: #00af00\">12,000</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ lstm_40 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">LSTM</span>)                  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">30</span>)             │         <span style=\"color: #00af00; text-decoration-color: #00af00\">9,720</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_26 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">60</span>)             │         <span style=\"color: #00af00; text-decoration-color: #00af00\">1,860</span> │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ lstm_39 (\u001b[38;5;33mLSTM\u001b[0m)                  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m60\u001b[0m, \u001b[38;5;34m50\u001b[0m)         │        \u001b[38;5;34m12,000\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ lstm_40 (\u001b[38;5;33mLSTM\u001b[0m)                  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m)             │         \u001b[38;5;34m9,720\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_26 (\u001b[38;5;33mDense\u001b[0m)                │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m60\u001b[0m)             │         \u001b[38;5;34m1,860\u001b[0m │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">70,742</span> (276.34 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m70,742\u001b[0m (276.34 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">23,580</span> (92.11 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m23,580\u001b[0m (92.11 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Optimizer params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">47,162</span> (184.23 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Optimizer params: \u001b[0m\u001b[38;5;34m47,162\u001b[0m (184.23 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.00162351, -0.00127119, -0.00132501, -0.00043088, -0.00357354,\n",
       "       -0.00349951, -0.00703096, -0.00679523,  0.00243503,  0.00075096,\n",
       "        0.015773  ,  0.01411158,  0.01689583,  0.01593572,  0.00665802,\n",
       "        0.0068891 , -0.001706  , -0.00310844, -0.00871575, -0.00967073,\n",
       "       -0.00820547, -0.00687617,  0.00930512,  0.00670969,  0.00769156,\n",
       "        0.01000088, -0.00052458,  0.00010484, -0.00573552, -0.00811213,\n",
       "       -0.01016176, -0.01063424, -0.01580012, -0.01502603, -0.01243931,\n",
       "       -0.01488668,  0.00733459,  0.00564003,  0.01374102,  0.01534522,\n",
       "        0.00324941,  0.00375515, -0.0078848 , -0.00780392, -0.01223874,\n",
       "       -0.01329106, -0.00772917, -0.00823385,  0.01035273,  0.01039612,\n",
       "        0.01731664,  0.01493615,  0.00356281,  0.00522107, -0.00680918,\n",
       "       -0.00461727, -0.00997645, -0.01072395, -0.00542653, -0.00710839],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 238,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_predict[0]-y_test[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}