{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "from datetime import datetime \n",
    "from datetime import date\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from keras.models import Sequential\n",
    "from keras.layers import LSTM, Dense\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
    "from keras.callbacks import ModelCheckpoint\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>zone_047_hw_valve</th>\n",
       "      <th>rtu_004_sat_sp_tn</th>\n",
       "      <th>zone_047_temp</th>\n",
       "      <th>zone_047_fan_spd</th>\n",
       "      <th>rtu_004_fltrd_sa_flow_tn</th>\n",
       "      <th>rtu_004_sa_temp</th>\n",
       "      <th>rtu_004_pa_static_stpt_tn</th>\n",
       "      <th>rtu_004_oa_flow_tn</th>\n",
       "      <th>rtu_004_oadmpr_pct</th>\n",
       "      <th>...</th>\n",
       "      <th>zone_047_heating_sp</th>\n",
       "      <th>Unnamed: 47_y</th>\n",
       "      <th>hvac_S</th>\n",
       "      <th>hp_hws_temp</th>\n",
       "      <th>aru_001_cwr_temp</th>\n",
       "      <th>aru_001_cws_fr_gpm</th>\n",
       "      <th>aru_001_cws_temp</th>\n",
       "      <th>aru_001_hwr_temp</th>\n",
       "      <th>aru_001_hws_fr_gpm</th>\n",
       "      <th>aru_001_hws_temp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-01-01 00:00:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9265.604</td>\n",
       "      <td>66.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-01-01 00:01:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9265.604</td>\n",
       "      <td>66.0</td>\n",
       "      <td>0.06</td>\n",
       "      <td>6572.099162</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2018-01-01 00:02:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9708.240</td>\n",
       "      <td>66.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>7628.832542</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2018-01-01 00:03:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9611.638</td>\n",
       "      <td>66.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>7710.294617</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2018-01-01 00:04:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>67.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>9215.110</td>\n",
       "      <td>66.0</td>\n",
       "      <td>0.06</td>\n",
       "      <td>7139.184090</td>\n",
       "      <td>28.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072149</th>\n",
       "      <td>2020-12-31 23:58:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>18884.834</td>\n",
       "      <td>64.4</td>\n",
       "      <td>0.06</td>\n",
       "      <td>2938.320000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072150</th>\n",
       "      <td>2020-12-31 23:58:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>18884.834</td>\n",
       "      <td>64.4</td>\n",
       "      <td>0.06</td>\n",
       "      <td>2938.320000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072151</th>\n",
       "      <td>2020-12-31 23:59:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19345.508</td>\n",
       "      <td>64.3</td>\n",
       "      <td>0.06</td>\n",
       "      <td>3154.390000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072152</th>\n",
       "      <td>2020-12-31 23:59:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19345.508</td>\n",
       "      <td>64.3</td>\n",
       "      <td>0.06</td>\n",
       "      <td>3154.390000</td>\n",
       "      <td>23.4</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.145000</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072153</th>\n",
       "      <td>2021-01-01 00:00:00</td>\n",
       "      <td>100.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>63.2</td>\n",
       "      <td>20.0</td>\n",
       "      <td>18650.232</td>\n",
       "      <td>64.1</td>\n",
       "      <td>0.06</td>\n",
       "      <td>3076.270000</td>\n",
       "      <td>22.9</td>\n",
       "      <td>...</td>\n",
       "      <td>71.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>23.788947</td>\n",
       "      <td>123.8</td>\n",
       "      <td>56.25</td>\n",
       "      <td>54.71</td>\n",
       "      <td>56.4</td>\n",
       "      <td>123.42</td>\n",
       "      <td>61.6</td>\n",
       "      <td>122.36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2072154 rows × 30 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        date  zone_047_hw_valve  rtu_004_sat_sp_tn  \\\n",
       "0        2018-01-01 00:00:00              100.0               69.0   \n",
       "1        2018-01-01 00:01:00              100.0               69.0   \n",
       "2        2018-01-01 00:02:00              100.0               69.0   \n",
       "3        2018-01-01 00:03:00              100.0               69.0   \n",
       "4        2018-01-01 00:04:00              100.0               69.0   \n",
       "...                      ...                ...                ...   \n",
       "2072149  2020-12-31 23:58:00              100.0               68.0   \n",
       "2072150  2020-12-31 23:58:00              100.0               68.0   \n",
       "2072151  2020-12-31 23:59:00              100.0               68.0   \n",
       "2072152  2020-12-31 23:59:00              100.0               68.0   \n",
       "2072153  2021-01-01 00:00:00              100.0               68.0   \n",
       "\n",
       "         zone_047_temp  zone_047_fan_spd  rtu_004_fltrd_sa_flow_tn  \\\n",
       "0                 67.5              20.0                  9265.604   \n",
       "1                 67.5              20.0                  9265.604   \n",
       "2                 67.5              20.0                  9708.240   \n",
       "3                 67.5              20.0                  9611.638   \n",
       "4                 67.5              20.0                  9215.110   \n",
       "...                ...               ...                       ...   \n",
       "2072149           63.2              20.0                 18884.834   \n",
       "2072150           63.2              20.0                 18884.834   \n",
       "2072151           63.2              20.0                 19345.508   \n",
       "2072152           63.2              20.0                 19345.508   \n",
       "2072153           63.2              20.0                 18650.232   \n",
       "\n",
       "         rtu_004_sa_temp  rtu_004_pa_static_stpt_tn  rtu_004_oa_flow_tn  \\\n",
       "0                   66.1                       0.06            0.000000   \n",
       "1                   66.0                       0.06         6572.099162   \n",
       "2                   66.1                       0.06         7628.832542   \n",
       "3                   66.1                       0.06         7710.294617   \n",
       "4                   66.0                       0.06         7139.184090   \n",
       "...                  ...                        ...                 ...   \n",
       "2072149             64.4                       0.06         2938.320000   \n",
       "2072150             64.4                       0.06         2938.320000   \n",
       "2072151             64.3                       0.06         3154.390000   \n",
       "2072152             64.3                       0.06         3154.390000   \n",
       "2072153             64.1                       0.06         3076.270000   \n",
       "\n",
       "         rtu_004_oadmpr_pct  ...  zone_047_heating_sp  Unnamed: 47_y  \\\n",
       "0                      28.0  ...                  NaN            NaN   \n",
       "1                      28.0  ...                  NaN            NaN   \n",
       "2                      28.0  ...                  NaN            NaN   \n",
       "3                      28.0  ...                  NaN            NaN   \n",
       "4                      28.0  ...                  NaN            NaN   \n",
       "...                     ...  ...                  ...            ...   \n",
       "2072149                23.4  ...                 71.0           69.0   \n",
       "2072150                23.4  ...                 71.0           69.0   \n",
       "2072151                23.4  ...                 71.0           69.0   \n",
       "2072152                23.4  ...                 71.0           69.0   \n",
       "2072153                22.9  ...                 71.0           69.0   \n",
       "\n",
       "            hvac_S  hp_hws_temp  aru_001_cwr_temp  aru_001_cws_fr_gpm  \\\n",
       "0              NaN         75.3               NaN                 NaN   \n",
       "1              NaN         75.3               NaN                 NaN   \n",
       "2              NaN         75.3               NaN                 NaN   \n",
       "3              NaN         75.3               NaN                 NaN   \n",
       "4              NaN         75.3               NaN                 NaN   \n",
       "...            ...          ...               ...                 ...   \n",
       "2072149  23.145000        123.8             56.25               54.71   \n",
       "2072150  23.145000        123.8             56.25               54.71   \n",
       "2072151  23.145000        123.8             56.25               54.71   \n",
       "2072152  23.145000        123.8             56.25               54.71   \n",
       "2072153  23.788947        123.8             56.25               54.71   \n",
       "\n",
       "         aru_001_cws_temp  aru_001_hwr_temp  aru_001_hws_fr_gpm  \\\n",
       "0                     NaN               NaN                 NaN   \n",
       "1                     NaN               NaN                 NaN   \n",
       "2                     NaN               NaN                 NaN   \n",
       "3                     NaN               NaN                 NaN   \n",
       "4                     NaN               NaN                 NaN   \n",
       "...                   ...               ...                 ...   \n",
       "2072149              56.4            123.42                61.6   \n",
       "2072150              56.4            123.42                61.6   \n",
       "2072151              56.4            123.42                61.6   \n",
       "2072152              56.4            123.42                61.6   \n",
       "2072153              56.4            123.42                61.6   \n",
       "\n",
       "         aru_001_hws_temp  \n",
       "0                     NaN  \n",
       "1                     NaN  \n",
       "2                     NaN  \n",
       "3                     NaN  \n",
       "4                     NaN  \n",
       "...                   ...  \n",
       "2072149            122.36  \n",
       "2072150            122.36  \n",
       "2072151            122.36  \n",
       "2072152            122.36  \n",
       "2072153            122.36  \n",
       "\n",
       "[2072154 rows x 30 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged = pd.read_csv(r'C:\\Users\\jerin\\Downloads\\lbnlbldg59\\lbnlbldg59\\lbnlbldg59.processed\\LBNLBLDG59\\clean_Bldg59_2018to2020\\clean data\\long_merge.csv')\n",
    "\n",
    "zone = \"47\"\n",
    "\n",
    "if zone in [\"36\", \"37\", \"38\", \"39\", \"40\", \"41\", \"42\", \"64\", \"65\", \"66\", \"67\", \"68\", \"69\", \"70\"]:\n",
    "    rtu = \"rtu_001\"\n",
    "    wing = \"hvac_N\"\n",
    "elif zone in [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\"]:\n",
    "    rtu = \"rtu_003\"\n",
    "    wing = \"hvac_S\"\n",
    "elif zone in [\"16\", \"17\", \"21\", \"22\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]:\n",
    "    rtu = \"rtu_004\"\n",
    "    wing = \"hvac_S\"\n",
    "else:\n",
    "    rtu = \"rtu_002\"\n",
    "    wing = \"hvac_N\"\n",
    "#merged is the dataframe\n",
    "sorted = merged[[\"date\"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+[\"hp_hws_temp\", \"aru_001_cwr_temp\" , \"aru_001_cws_fr_gpm\" ,\"aru_001_cws_temp\",\"aru_001_hwr_temp\" ,\"aru_001_hws_fr_gpm\" ,\"aru_001_hws_temp\"]]\n",
    "sorted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "date                                          0\n",
       "zone_047_hw_valve                             0\n",
       "rtu_004_sat_sp_tn                             0\n",
       "zone_047_temp                                 0\n",
       "zone_047_fan_spd                              0\n",
       "rtu_004_fltrd_sa_flow_tn                      0\n",
       "rtu_004_sa_temp                               0\n",
       "rtu_004_pa_static_stpt_tn                     0\n",
       "rtu_004_oa_flow_tn                            0\n",
       "rtu_004_oadmpr_pct                            0\n",
       "rtu_004_econ_stpt_tn                          0\n",
       "rtu_004_ra_temp                               0\n",
       "rtu_004_oa_temp                               0\n",
       "rtu_004_ma_temp                               0\n",
       "rtu_004_sf_vfd_spd_fbk_tn                     0\n",
       "rtu_004_rf_vfd_spd_fbk_tn                     0\n",
       "rtu_004_fltrd_gnd_lvl_plenum_press_tn         0\n",
       "rtu_004_fltrd_lvl2_plenum_press_tn            0\n",
       "zone_047_cooling_sp                           0\n",
       "Unnamed: 47_x                            394570\n",
       "zone_047_heating_sp                           0\n",
       "Unnamed: 47_y                            394570\n",
       "hvac_S                                    13035\n",
       "hp_hws_temp                                   0\n",
       "aru_001_cwr_temp                         524350\n",
       "aru_001_cws_fr_gpm                       524350\n",
       "aru_001_cws_temp                         524350\n",
       "aru_001_hwr_temp                         299165\n",
       "aru_001_hws_fr_gpm                       299165\n",
       "aru_001_hws_temp                         299165\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_df = sorted.copy()\n",
    "final_df['date'] = pd.to_datetime(final_df['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
    "final_df = final_df[ (final_df.date.dt.date >date(2019, 4, 1)) & (final_df.date.dt.date< date(2020, 2, 15))]\n",
    "final_df.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "testdataset_df = final_df[(final_df.date.dt.date <date(2019, 11, 8))]\n",
    "\n",
    "traindataset_df = final_df[ (final_df.date.dt.date >date(2019, 11, 8))]\n",
    "\n",
    "testdataset = testdataset_df[['rtu_004_oa_temp','rtu_004_ra_temp','hp_hws_temp','rtu_004_oa_flow_tn','rtu_004_oadmpr_pct',\n",
    "                    'rtu_004_sat_sp_tn','rtu_004_rf_vfd_spd_fbk_tn','rtu_004_ma_temp','rtu_004_sa_temp','rtu_004_fltrd_sa_flow_tn',\n",
    "                    'rtu_004_sf_vfd_spd_fbk_tn']].values\n",
    "\n",
    "\n",
    "traindataset = traindataset_df[['rtu_004_oa_temp','rtu_004_ra_temp','hp_hws_temp','rtu_004_oa_flow_tn','rtu_004_oadmpr_pct',\n",
    "                    'rtu_004_sat_sp_tn','rtu_004_rf_vfd_spd_fbk_tn','rtu_004_ma_temp','rtu_004_sa_temp','rtu_004_fltrd_sa_flow_tn',\n",
    "                    'rtu_004_sf_vfd_spd_fbk_tn']].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "traindataset = traindataset.astype('float32')\n",
    "testdataset = testdataset.astype('float32')\n",
    "\n",
    "\n",
    "scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "traindataset = scaler.fit_transform(traindataset)\n",
    "testdataset = scaler.transform(testdataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
      "  super().__init__(**kwargs)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0071\n",
      "Epoch 1: val_loss improved from inf to 0.01145, saving model to lstm2.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m77s\u001b[0m 23ms/step - loss: 0.0071 - val_loss: 0.0115\n",
      "Epoch 2/10\n",
      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0013\n",
      "Epoch 2: val_loss improved from 0.01145 to 0.01144, saving model to lstm2.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 23ms/step - loss: 0.0013 - val_loss: 0.0114\n",
      "Epoch 3/10\n",
      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0010\n",
      "Epoch 3: val_loss improved from 0.01144 to 0.00729, saving model to lstm2.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m71s\u001b[0m 22ms/step - loss: 0.0010 - val_loss: 0.0073\n",
      "Epoch 4/10\n",
      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 5.5876e-04\n",
      "Epoch 4: val_loss improved from 0.00729 to 0.00409, saving model to lstm2.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 23ms/step - loss: 5.5871e-04 - val_loss: 0.0041\n",
      "Epoch 5/10\n",
      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 3.9261e-04\n",
      "Epoch 5: val_loss improved from 0.00409 to 0.00386, saving model to lstm2.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m72s\u001b[0m 22ms/step - loss: 3.9260e-04 - val_loss: 0.0039\n",
      "Epoch 6/10\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 3.3977e-04\n",
      "Epoch 6: val_loss did not improve from 0.00386\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m70s\u001b[0m 22ms/step - loss: 3.3976e-04 - val_loss: 0.0049\n",
      "Epoch 7/10\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 3.0365e-04\n",
      "Epoch 7: val_loss did not improve from 0.00386\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m69s\u001b[0m 22ms/step - loss: 3.0364e-04 - val_loss: 0.0052\n",
      "Epoch 8/10\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 2.7422e-04\n",
      "Epoch 8: val_loss did not improve from 0.00386\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m70s\u001b[0m 22ms/step - loss: 2.7422e-04 - val_loss: 0.0052\n",
      "Epoch 9/10\n",
      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 2.5380e-04\n",
      "Epoch 9: val_loss did not improve from 0.00386\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 23ms/step - loss: 2.5379e-04 - val_loss: 0.0058\n",
      "Epoch 10/10\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 2.3404e-04\n",
      "Epoch 10: val_loss did not improve from 0.00386\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m72s\u001b[0m 22ms/step - loss: 2.3403e-04 - val_loss: 0.0099\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x2a05f762150>"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train,test = traindataset,testdataset\n",
    "\n",
    "def create_dataset(dataset,time_step):\n",
    "    x1,x2,x3,x4,x5,x6,x7,x8,x9,Y = [],[],[],[],[],[],[],[],[],[]\n",
    "    for i in range(len(dataset)-time_step-1):\n",
    "        x1.append(dataset[i:(i+time_step), 0])\n",
    "        x2.append(dataset[i:(i+time_step), 1])\n",
    "        x3.append(dataset[i:(i+time_step), 2])\n",
    "        x4.append(dataset[i:(i+time_step), 3])\n",
    "        x5.append(dataset[i:(i+time_step), 4])\n",
    "        x6.append(dataset[i:(i+time_step), 5])\n",
    "        x7.append(dataset[i:(i+time_step), 6])\n",
    "        x8.append(dataset[i:(i+time_step), 7])\n",
    "        # x9.append(dataset[i:(i+time_step), 8])\n",
    "        Y.append([dataset[i + time_step, 7]])\n",
    "    x1,x2,x3,x4,x5,x6,x7,x8 =  np.array(x1),np.array(x2),np.array(x3), np.array(x4),np.array(x5),np.array(x6),np.array(x7),np.array(x8)#,np.array(x9)\n",
    "    Y = np.reshape(Y,(len(Y),1))\n",
    "    return np.stack([x1,x2,x3,x4,x5,x6,x7,x8],axis=2),Y\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "time_step = 30\n",
    "X_train, y_train = create_dataset(train, time_step)\n",
    "X_test, y_test = create_dataset(test, time_step)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
    "model.add(LSTM(units=50, return_sequences=True))\n",
    "model.add(LSTM(units=30))\n",
    "model.add(Dense(units=1))\n",
    "\n",
    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
    "\n",
    "checkpoint_path = \"lstm2.keras\"\n",
    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - loss: 1.8977e-04\n",
      "Epoch 1: val_loss improved from inf to 0.01131, saving model to lstm2.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m94s\u001b[0m 29ms/step - loss: 1.8977e-04 - val_loss: 0.0113\n",
      "Epoch 2/5\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - loss: 1.7357e-04\n",
      "Epoch 2: val_loss did not improve from 0.01131\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m91s\u001b[0m 28ms/step - loss: 1.7358e-04 - val_loss: 0.0123\n",
      "Epoch 3/5\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - loss: 1.6701e-04\n",
      "Epoch 3: val_loss did not improve from 0.01131\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m92s\u001b[0m 28ms/step - loss: 1.6701e-04 - val_loss: 0.0127\n",
      "Epoch 4/5\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - loss: 1.7043e-04\n",
      "Epoch 4: val_loss did not improve from 0.01131\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m91s\u001b[0m 28ms/step - loss: 1.7043e-04 - val_loss: 0.0131\n",
      "Epoch 5/5\n",
      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - loss: 1.6319e-04\n",
      "Epoch 5: val_loss did not improve from 0.01131\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m104s\u001b[0m 32ms/step - loss: 1.6319e-04 - val_loss: 0.0134\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x2a05f486ed0>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m9900/9900\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m34s\u001b[0m 3ms/step\n"
     ]
    }
   ],
   "source": [
    "# train_predict = model.predict(X_train)\n",
    "test_predict = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "#'rtu_004_ma_temp','rtu_004_sa_temp'\n",
    "var = 0\n",
    "plt.plot(testdataset_df['date'][31:],y_test, label='Original Testing Data', color='blue')\n",
    "plt.plot(testdataset_df['date'][31:],test_predict, label='Predicted Testing Data', color='red',alpha=0.8)\n",
    "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n",
    "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n",
    "\n",
    "\n",
    "plt.title('Testing Data - Predicted vs Actual')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Value')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
     ]
    }
   ],
   "source": [
    "from tensorflow.keras.models import load_model\n",
    "# model.save(\"MA_temp_model.h5\") \n",
    "# loaded_model = load_model(\"MA_temp_model.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "ENERGY DATA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>air_temp_set_1</th>\n",
       "      <th>air_temp_set_2</th>\n",
       "      <th>dew_point_temperature_set_1d</th>\n",
       "      <th>relative_humidity_set_1</th>\n",
       "      <th>solar_radiation_set_1</th>\n",
       "      <th>wifi_third_south</th>\n",
       "      <th>wifi_fourth_south</th>\n",
       "      <th>hvac_N</th>\n",
       "      <th>hvac_S</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-01-01 00:00:00</td>\n",
       "      <td>11.64</td>\n",
       "      <td>11.51</td>\n",
       "      <td>8.1</td>\n",
       "      <td>79.07</td>\n",
       "      <td>86.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-01-01 00:01:00</td>\n",
       "      <td>11.64</td>\n",
       "      <td>11.51</td>\n",
       "      <td>8.1</td>\n",
       "      <td>79.07</td>\n",
       "      <td>86.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  date  air_temp_set_1  air_temp_set_2  \\\n",
       "0  2018-01-01 00:00:00           11.64           11.51   \n",
       "1  2018-01-01 00:01:00           11.64           11.51   \n",
       "\n",
       "   dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
       "0                           8.1                    79.07   \n",
       "1                           8.1                    79.07   \n",
       "\n",
       "   solar_radiation_set_1  wifi_third_south  wifi_fourth_south  hvac_N  hvac_S  \n",
       "0                   86.7               NaN                NaN     NaN     NaN  \n",
       "1                   86.7               NaN                NaN     NaN     NaN  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "zone =  [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\",\"16\", \"17\", \"21\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]\n",
    "rtu = [\"rtu_001\",\"rtu_002\",\"rtu_003\",\"rtu_004\"]\n",
    "wing = [\"hvac_N\",\"hvac_S\"]\n",
    "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
    "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
    "# any(sub in col for sub in zone) or\n",
    "energy_data = merged[[\"date\"]+[col for col in merged.columns if \n",
    "                               any(sub in col for sub in env) or any(sub in col for sub in wifi)]+wing]\n",
    "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
    "# df_filtered = df_filtered.dropna()\n",
    "df_filtered.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are NA values in the DataFrame columns.\n"
     ]
    }
   ],
   "source": [
    "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
    "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2019, 4, 1)) & (df_filtered.date.dt.date< date(2020, 2, 15))]\n",
    "# df_filtered.isna().sum()\n",
    "if df_filtered.isna().any().any():\n",
    "    print(\"There are NA values in the DataFrame columns.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testdataset_df = df_filtered[(df_filtered.date.dt.date <date(2019, 8, 21))]\n",
    "\n",
    "traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
    "\n",
    "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n",
    "\n",
    "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n",
    "\n",
    "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
    "columns_with_na"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "traindataset = traindataset.astype('float32')\n",
    "testdataset = testdataset.astype('float32')\n",
    "\n",
    "scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "traindataset = scaler.fit_transform(traindataset)\n",
    "testdataset = scaler.transform(testdataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/3\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0036\n",
      "Epoch 1: val_loss improved from inf to 0.00068, saving model to lstm_energy_01.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0036 - val_loss: 6.8049e-04\n",
      "Epoch 2/3\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.8055e-04\n",
      "Epoch 2: val_loss improved from 0.00068 to 0.00064, saving model to lstm_energy_01.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m60s\u001b[0m 19ms/step - loss: 4.8055e-04 - val_loss: 6.4225e-04\n",
      "Epoch 3/3\n",
      "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.6623e-04\n",
      "Epoch 3: val_loss improved from 0.00064 to 0.00061, saving model to lstm_energy_01.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m61s\u001b[0m 19ms/step - loss: 4.6622e-04 - val_loss: 6.0579e-04\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x1ea4d2ed650>"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train,test = traindataset,testdataset\n",
    "\n",
    "def create_dataset(dataset,time_step):\n",
    "    x = [[] for _ in range(9)] \n",
    "    Y = []\n",
    "    for i in range(len(dataset) - time_step - 1):\n",
    "        for j in range(9):\n",
    "            x[j].append(dataset[i:(i + time_step), j])\n",
    "        Y.append([dataset[i + time_step, 7],dataset[i + time_step, 8]])\n",
    "    x= [np.array(feature_list) for feature_list in x]\n",
    "    Y = np.reshape(Y,(len(Y),2))\n",
    "    return np.stack(x,axis=2),Y\n",
    "\n",
    "time_step = 30\n",
    "X_train, y_train = create_dataset(train, time_step)\n",
    "X_test, y_test = create_dataset(test, time_step)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
    "model.add(LSTM(units=50, return_sequences=True))\n",
    "model.add(LSTM(units=30))\n",
    "model.add(Dense(units=2))\n",
    "\n",
    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
    "\n",
    "checkpoint_path = \"lstm_energy_01.keras\"\n",
    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
    "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 3ms/step\n"
     ]
    }
   ],
   "source": [
    "test_predict1 = model.predict(X_test)\n",
    "# train_predict1 = model.predict(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "var = 1\n",
    "plt.plot(testdataset_df['date'][31:],y_test[:,1], label='Original Testing Data', color='blue')\n",
    "plt.plot(testdataset_df['date'][31:],test_predict1[:,1], label='Predicted Testing Data', color='red',alpha=0.8)\n",
    "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n",
    "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n",
    "\n",
    "\n",
    "plt.title('Testing Data - Predicted vs Actual')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Value')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
     ]
    }
   ],
   "source": [
    "# from tensorflow.keras.models import load_model\n",
    "# model.save(\"energy_model_01.h5\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x152fd38cd10>]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%matplotlib qt\n",
    "plt.plot(df_filtered['date'],df_filtered['hvac_S'])\n",
    "plt.plot(df_filtered['date'],df_filtered['rtu_003_sf_vfd_spd_fbk_tn'])\n",
    "plt.plot(df_filtered['date'],df_filtered['zone_025_temp'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1fe7e211d90>]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plt.plot(merged['hvac_S'])\n",
    "plt.plot(testdataset_df['hvac_S'])\n",
    "plt.plot(traindataset_df['hvac_S'],'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1fe8ecf5bd0>]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plt.plot(merged['hvac_N'])\n",
    "plt.plot(testdataset_df['hvac_N'])\n",
    "plt.plot(traindataset_df['hvac_N'],'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# merged.columns.to_list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.16.1\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LSTM 2.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>hp_hws_temp</th>\n",
       "      <th>rtu_003_sat_sp_tn</th>\n",
       "      <th>rtu_003_fltrd_sa_flow_tn</th>\n",
       "      <th>rtu_003_sa_temp</th>\n",
       "      <th>rtu_003_pa_static_stpt_tn</th>\n",
       "      <th>rtu_003_oa_flow_tn</th>\n",
       "      <th>rtu_003_oadmpr_pct</th>\n",
       "      <th>rtu_003_econ_stpt_tn</th>\n",
       "      <th>rtu_003_ra_temp</th>\n",
       "      <th>...</th>\n",
       "      <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
       "      <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
       "      <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
       "      <th>wifi_third_south</th>\n",
       "      <th>wifi_fourth_south</th>\n",
       "      <th>air_temp_set_1</th>\n",
       "      <th>air_temp_set_2</th>\n",
       "      <th>dew_point_temperature_set_1d</th>\n",
       "      <th>relative_humidity_set_1</th>\n",
       "      <th>solar_radiation_set_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-01-01 00:00:00</td>\n",
       "      <td>75.3</td>\n",
       "      <td>65.0</td>\n",
       "      <td>13558.539</td>\n",
       "      <td>65.5</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>34.6</td>\n",
       "      <td>65.0</td>\n",
       "      <td>67.9</td>\n",
       "      <td>...</td>\n",
       "      <td>49.9</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.64</td>\n",
       "      <td>11.51</td>\n",
       "      <td>8.1</td>\n",
       "      <td>79.07</td>\n",
       "      <td>86.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-01-01 00:01:00</td>\n",
       "      <td>75.3</td>\n",
       "      <td>65.0</td>\n",
       "      <td>13592.909</td>\n",
       "      <td>65.6</td>\n",
       "      <td>0.6</td>\n",
       "      <td>5992.059572</td>\n",
       "      <td>34.6</td>\n",
       "      <td>65.0</td>\n",
       "      <td>67.9</td>\n",
       "      <td>...</td>\n",
       "      <td>49.4</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.64</td>\n",
       "      <td>11.51</td>\n",
       "      <td>8.1</td>\n",
       "      <td>79.07</td>\n",
       "      <td>86.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  date  hp_hws_temp  rtu_003_sat_sp_tn  \\\n",
       "0  2018-01-01 00:00:00         75.3               65.0   \n",
       "1  2018-01-01 00:01:00         75.3               65.0   \n",
       "\n",
       "   rtu_003_fltrd_sa_flow_tn  rtu_003_sa_temp  rtu_003_pa_static_stpt_tn  \\\n",
       "0                 13558.539             65.5                        0.6   \n",
       "1                 13592.909             65.6                        0.6   \n",
       "\n",
       "   rtu_003_oa_flow_tn  rtu_003_oadmpr_pct  rtu_003_econ_stpt_tn  \\\n",
       "0            0.000000                34.6                  65.0   \n",
       "1         5992.059572                34.6                  65.0   \n",
       "\n",
       "   rtu_003_ra_temp  ...  rtu_003_rf_vfd_spd_fbk_tn  \\\n",
       "0             67.9  ...                       49.9   \n",
       "1             67.9  ...                       49.4   \n",
       "\n",
       "   rtu_003_fltrd_gnd_lvl_plenum_press_tn  rtu_003_fltrd_lvl2_plenum_press_tn  \\\n",
       "0                                   0.04                                0.05   \n",
       "1                                   0.04                                0.04   \n",
       "\n",
       "   wifi_third_south  wifi_fourth_south  air_temp_set_1  air_temp_set_2  \\\n",
       "0               NaN                NaN           11.64           11.51   \n",
       "1               NaN                NaN           11.64           11.51   \n",
       "\n",
       "   dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
       "0                           8.1                    79.07   \n",
       "1                           8.1                    79.07   \n",
       "\n",
       "   solar_radiation_set_1  \n",
       "0                   86.7  \n",
       "1                   86.7  \n",
       "\n",
       "[2 rows x 23 columns]"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rtu = [\"rtu_003\"]\n",
    "# wing = [\"hvac_N\",\"hvac_S\"]\n",
    "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
    "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
    "[\"rtu_003_ma_temp\",]\n",
    "# any(sub in col for sub in zone) or\n",
    "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n",
    "                               any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n",
    "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
    "# df_filtered = df_filtered.dropna()\n",
    "df_filtered.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
    "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2019, 4, 1)) & (df_filtered.date.dt.date< date(2020, 2, 15))]\n",
    "# df_filtered.isna().sum()\n",
    "if df_filtered.isna().any().any():\n",
    "    print(\"There are NA values in the DataFrame columns.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n",
    " 'rtu_003_sa_temp',\n",
    " 'rtu_003_oadmpr_pct',\n",
    " 'rtu_003_ra_temp',\n",
    " 'rtu_003_oa_temp',\n",
    " 'rtu_003_ma_temp',\n",
    " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
    " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
    " 'wifi_fourth_south',\n",
    " 'air_temp_set_1',\n",
    " 'air_temp_set_2',\n",
    " 'dew_point_temperature_set_1d',\n",
    " 'relative_humidity_set_1',\n",
    " 'solar_radiation_set_1']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testdataset_df = df_filtered[(df_filtered.date.dt.date <date(2019, 8, 21))]\n",
    "\n",
    "traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
    "# .ewm(com = 1000,adjust=True).mean()\n",
    "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n",
    "\n",
    "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n",
    "\n",
    "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
    "columns_with_na"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [],
   "source": [
    "traindataset = traindataset.astype('float32')\n",
    "testdataset = testdataset.astype('float32')\n",
    "\n",
    "scaler = StandardScaler()\n",
    "traindataset = scaler.fit_transform(traindataset)\n",
    "testdataset = scaler.transform(testdataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
      "  super().__init__(**kwargs)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.1413\n",
      "Epoch 1: val_loss improved from inf to 0.52256, saving model to lstm_smooth_01.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.1412 - val_loss: 0.5226\n",
      "Epoch 2/5\n",
      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0393\n",
      "Epoch 2: val_loss improved from 0.52256 to 0.50228, saving model to lstm_smooth_01.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0393 - val_loss: 0.5023\n",
      "Epoch 3/5\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0347\n",
      "Epoch 3: val_loss improved from 0.50228 to 0.48711, saving model to lstm_smooth_01.keras\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0347 - val_loss: 0.4871\n",
      "Epoch 4/5\n",
      "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0319\n",
      "Epoch 4: val_loss did not improve from 0.48711\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.0319 - val_loss: 0.4958\n",
      "Epoch 5/5\n",
      "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0303\n",
      "Epoch 5: val_loss did not improve from 0.48711\n",
      "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0303 - val_loss: 0.5026\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x1d460bb83d0>"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train,test = traindataset,testdataset\n",
    "\n",
    "def create_dataset(dataset,time_step):\n",
    "    x = [[] for _ in range(15)] \n",
    "    Y = []\n",
    "    for i in range(len(dataset) - time_step - 1):\n",
    "        for j in range(15):\n",
    "            x[j].append(dataset[i:(i + time_step), j])\n",
    "        Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n",
    "                  dataset[i + time_step, 6],dataset[i + time_step, 7]])\n",
    "    x= [np.array(feature_list) for feature_list in x]\n",
    "    Y = np.reshape(Y,(len(Y),8))\n",
    "    return np.stack(x,axis=2),Y\n",
    "\n",
    "time_step = 30\n",
    "X_train, y_train = create_dataset(train, time_step)\n",
    "X_test, y_test = create_dataset(test, time_step)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
    "model.add(LSTM(units=50, return_sequences=True))\n",
    "model.add(LSTM(units=30))\n",
    "model.add(Dense(units=8))\n",
    "\n",
    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
    "\n",
    "checkpoint_path = \"lstm_smooth_01.keras\"\n",
    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m23s\u001b[0m 4ms/step\n"
     ]
    }
   ],
   "source": [
    "test_predict1 = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "var = 0\n",
    "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
    "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
    "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)[var]\n",
    "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
    "\n",
    "\n",
    "plt.title('Testing Data - Predicted vs Actual')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Value')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1d334841450>]"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "var = 0\n",
    "plt.plot((test_predict1 - y_test)[:,var])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "params = ['hp_hws_temp',\n",
    " 'rtu_003_sa_temp',\n",
    " 'rtu_003_oadmpr_pct',\n",
    " 'rtu_003_ra_temp',\n",
    " 'rtu_003_oa_temp',\n",
    " 'rtu_003_ma_temp',\n",
    " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
    " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
    " 'wifi_fourth_south',\n",
    " 'air_temp_set_1',\n",
    " 'air_temp_set_2',\n",
    " 'dew_point_temperature_set_1d',\n",
    " 'relative_humidity_set_1',\n",
    " 'solar_radiation_set_1']\n",
    "\n",
    "idx_2_params = {}\n",
    "for i, param in enumerate(params):\n",
    "    idx_2_params[i] = param"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: 'hp_hws_temp',\n",
       " 1: 'rtu_003_sa_temp',\n",
       " 2: 'rtu_003_oadmpr_pct',\n",
       " 3: 'rtu_003_ra_temp',\n",
       " 4: 'rtu_003_oa_temp',\n",
       " 5: 'rtu_003_ma_temp',\n",
       " 6: 'rtu_003_sf_vfd_spd_fbk_tn',\n",
       " 7: 'rtu_003_rf_vfd_spd_fbk_tn',\n",
       " 8: 'wifi_third_south',\n",
       " 9: 'wifi_fourth_south',\n",
       " 10: 'air_temp_set_1',\n",
       " 11: 'air_temp_set_2',\n",
       " 12: 'dew_point_temperature_set_1d',\n",
       " 13: 'relative_humidity_set_1',\n",
       " 14: 'solar_radiation_set_1'}"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idx_2_params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "KMEANS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import KMeans\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.decomposition import PCA\n",
    "# Generating random data for demonstration\n",
    "np.random.seed(0)\n",
    "X = test_predict1 - y_test\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "k = 3\n",
    "\n",
    "kmeans = KMeans(n_clusters=k)\n",
    "\n",
    "kmeans.fit(X)\n",
    "\n",
    "# Getting the cluster centers and labels\n",
    "centroids = kmeans.cluster_centers_\n",
    "labels = kmeans.labels_\n",
    "\n",
    "# Plotting the data points and cluster centers\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5, edgecolors='k')\n",
    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
    "plt.title('KMeans Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1d33ccad250>]"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plt.plot((test_predict1 - y_test)[:,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 5.8607887e-02, -2.4713947e-01,  2.4978706e-01, -7.8289807e-01,\n",
       "        -2.0218764e-01, -2.8860569e-01,  2.7817219e-01,  2.4209845e-01],\n",
       "       [-2.6845999e-02,  1.2596852e-01,  9.6294099e-01,  2.0099232e-01,\n",
       "         3.3391420e-02,  7.7613303e-04, -7.1204931e-02, -9.7836025e-02]],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca.components_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [],
   "source": [
    "k = 60\n",
    "X= test_predict1 - y_test\n",
    "processed_data = []\n",
    "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
    "for i in range(0,len(X), 30 ):\n",
    "    mean = X[i:i+k].mean(axis = 0)\n",
    "    std = X[i:i+k].std(axis = 0)\n",
    "    max = X[i:i+k].max(axis = 0)\n",
    "    min = X[i:i+k].min(axis = 0)\n",
    "    iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
    "    data = np.concatenate([mean, std, max, min, iqr])\n",
    "    processed_data.append([data])\n",
    "processed_data = np.concatenate(processed_data,axis=0) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = processed_data\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "k = 4\n",
    "\n",
    "kmeans = KMeans(n_clusters=k)\n",
    "\n",
    "kmeans.fit(X)\n",
    "\n",
    "# Getting the cluster centers and labels\n",
    "centroids = kmeans.cluster_centers_\n",
    "labels = kmeans.labels_\n",
    "\n",
    "# Plotting the data points and cluster centers\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
    "plt.title('KMeans Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "dd = df_filtered.drop(columns=[\"date\"],inplace=False)\n",
    "dg = dd.ewm(com = 1000,adjust=True).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>hp_hws_temp</th>\n",
       "      <th>rtu_003_sa_temp</th>\n",
       "      <th>rtu_003_oadmpr_pct</th>\n",
       "      <th>rtu_003_ra_temp</th>\n",
       "      <th>rtu_003_oa_temp</th>\n",
       "      <th>rtu_003_ma_temp</th>\n",
       "      <th>rtu_003_sf_vfd_spd_fbk_tn</th>\n",
       "      <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
       "      <th>wifi_third_south</th>\n",
       "      <th>wifi_fourth_south</th>\n",
       "      <th>air_temp_set_1</th>\n",
       "      <th>air_temp_set_2</th>\n",
       "      <th>dew_point_temperature_set_1d</th>\n",
       "      <th>relative_humidity_set_1</th>\n",
       "      <th>solar_radiation_set_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>555845</th>\n",
       "      <td>2019-04-02 00:00:00</td>\n",
       "      <td>120.7</td>\n",
       "      <td>66.7</td>\n",
       "      <td>84.4</td>\n",
       "      <td>72.2</td>\n",
       "      <td>59.2</td>\n",
       "      <td>66.7</td>\n",
       "      <td>79.6</td>\n",
       "      <td>53.7</td>\n",
       "      <td>34.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>15.67</td>\n",
       "      <td>14.92</td>\n",
       "      <td>11.77</td>\n",
       "      <td>77.80</td>\n",
       "      <td>147.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555846</th>\n",
       "      <td>2019-04-02 00:01:00</td>\n",
       "      <td>120.4</td>\n",
       "      <td>65.8</td>\n",
       "      <td>85.4</td>\n",
       "      <td>72.2</td>\n",
       "      <td>59.5</td>\n",
       "      <td>65.0</td>\n",
       "      <td>78.1</td>\n",
       "      <td>54.4</td>\n",
       "      <td>34.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>15.67</td>\n",
       "      <td>14.92</td>\n",
       "      <td>11.77</td>\n",
       "      <td>77.80</td>\n",
       "      <td>147.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555847</th>\n",
       "      <td>2019-04-02 00:02:00</td>\n",
       "      <td>120.1</td>\n",
       "      <td>65.1</td>\n",
       "      <td>66.2</td>\n",
       "      <td>72.1</td>\n",
       "      <td>59.4</td>\n",
       "      <td>64.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>60.1</td>\n",
       "      <td>34.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>15.67</td>\n",
       "      <td>14.92</td>\n",
       "      <td>11.77</td>\n",
       "      <td>77.80</td>\n",
       "      <td>147.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555848</th>\n",
       "      <td>2019-04-02 00:03:00</td>\n",
       "      <td>119.6</td>\n",
       "      <td>64.9</td>\n",
       "      <td>56.0</td>\n",
       "      <td>72.2</td>\n",
       "      <td>59.4</td>\n",
       "      <td>65.7</td>\n",
       "      <td>79.1</td>\n",
       "      <td>55.5</td>\n",
       "      <td>34.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>15.67</td>\n",
       "      <td>14.92</td>\n",
       "      <td>11.77</td>\n",
       "      <td>77.80</td>\n",
       "      <td>147.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555849</th>\n",
       "      <td>2019-04-02 00:04:00</td>\n",
       "      <td>119.3</td>\n",
       "      <td>65.5</td>\n",
       "      <td>54.6</td>\n",
       "      <td>72.0</td>\n",
       "      <td>59.2</td>\n",
       "      <td>67.1</td>\n",
       "      <td>75.2</td>\n",
       "      <td>53.1</td>\n",
       "      <td>34.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>15.67</td>\n",
       "      <td>14.92</td>\n",
       "      <td>11.77</td>\n",
       "      <td>77.80</td>\n",
       "      <td>147.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1080190</th>\n",
       "      <td>2020-02-14 23:57:00</td>\n",
       "      <td>121.9</td>\n",
       "      <td>67.3</td>\n",
       "      <td>52.8</td>\n",
       "      <td>73.2</td>\n",
       "      <td>63.5</td>\n",
       "      <td>69.2</td>\n",
       "      <td>80.9</td>\n",
       "      <td>61.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.42</td>\n",
       "      <td>13.93</td>\n",
       "      <td>6.93</td>\n",
       "      <td>53.66</td>\n",
       "      <td>347.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1080191</th>\n",
       "      <td>2020-02-14 23:58:00</td>\n",
       "      <td>122.7</td>\n",
       "      <td>69.2</td>\n",
       "      <td>64.8</td>\n",
       "      <td>73.3</td>\n",
       "      <td>63.4</td>\n",
       "      <td>70.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>53.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.42</td>\n",
       "      <td>13.93</td>\n",
       "      <td>6.93</td>\n",
       "      <td>53.66</td>\n",
       "      <td>347.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1080192</th>\n",
       "      <td>2020-02-14 23:58:00</td>\n",
       "      <td>122.7</td>\n",
       "      <td>69.2</td>\n",
       "      <td>64.8</td>\n",
       "      <td>73.3</td>\n",
       "      <td>63.4</td>\n",
       "      <td>70.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>53.8</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.42</td>\n",
       "      <td>13.93</td>\n",
       "      <td>6.93</td>\n",
       "      <td>53.66</td>\n",
       "      <td>347.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1080193</th>\n",
       "      <td>2020-02-14 23:59:00</td>\n",
       "      <td>122.9</td>\n",
       "      <td>68.7</td>\n",
       "      <td>80.8</td>\n",
       "      <td>73.3</td>\n",
       "      <td>63.1</td>\n",
       "      <td>67.3</td>\n",
       "      <td>82.2</td>\n",
       "      <td>60.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.42</td>\n",
       "      <td>13.93</td>\n",
       "      <td>6.93</td>\n",
       "      <td>53.66</td>\n",
       "      <td>347.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1080194</th>\n",
       "      <td>2020-02-14 23:59:00</td>\n",
       "      <td>122.9</td>\n",
       "      <td>68.7</td>\n",
       "      <td>80.8</td>\n",
       "      <td>73.3</td>\n",
       "      <td>63.1</td>\n",
       "      <td>67.3</td>\n",
       "      <td>82.2</td>\n",
       "      <td>60.1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>16.42</td>\n",
       "      <td>13.93</td>\n",
       "      <td>6.93</td>\n",
       "      <td>53.66</td>\n",
       "      <td>347.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>524350 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                       date  hp_hws_temp  rtu_003_sa_temp  rtu_003_oadmpr_pct  \\\n",
       "555845  2019-04-02 00:00:00        120.7             66.7                84.4   \n",
       "555846  2019-04-02 00:01:00        120.4             65.8                85.4   \n",
       "555847  2019-04-02 00:02:00        120.1             65.1                66.2   \n",
       "555848  2019-04-02 00:03:00        119.6             64.9                56.0   \n",
       "555849  2019-04-02 00:04:00        119.3             65.5                54.6   \n",
       "...                     ...          ...              ...                 ...   \n",
       "1080190 2020-02-14 23:57:00        121.9             67.3                52.8   \n",
       "1080191 2020-02-14 23:58:00        122.7             69.2                64.8   \n",
       "1080192 2020-02-14 23:58:00        122.7             69.2                64.8   \n",
       "1080193 2020-02-14 23:59:00        122.9             68.7                80.8   \n",
       "1080194 2020-02-14 23:59:00        122.9             68.7                80.8   \n",
       "\n",
       "         rtu_003_ra_temp  rtu_003_oa_temp  rtu_003_ma_temp  \\\n",
       "555845              72.2             59.2             66.7   \n",
       "555846              72.2             59.5             65.0   \n",
       "555847              72.1             59.4             64.0   \n",
       "555848              72.2             59.4             65.7   \n",
       "555849              72.0             59.2             67.1   \n",
       "...                  ...              ...              ...   \n",
       "1080190             73.2             63.5             69.2   \n",
       "1080191             73.3             63.4             70.0   \n",
       "1080192             73.3             63.4             70.0   \n",
       "1080193             73.3             63.1             67.3   \n",
       "1080194             73.3             63.1             67.3   \n",
       "\n",
       "         rtu_003_sf_vfd_spd_fbk_tn  rtu_003_rf_vfd_spd_fbk_tn  \\\n",
       "555845                        79.6                       53.7   \n",
       "555846                        78.1                       54.4   \n",
       "555847                        78.0                       60.1   \n",
       "555848                        79.1                       55.5   \n",
       "555849                        75.2                       53.1   \n",
       "...                            ...                        ...   \n",
       "1080190                       80.9                       61.3   \n",
       "1080191                       81.0                       53.8   \n",
       "1080192                       81.0                       53.8   \n",
       "1080193                       82.2                       60.1   \n",
       "1080194                       82.2                       60.1   \n",
       "\n",
       "         wifi_third_south  wifi_fourth_south  air_temp_set_1  air_temp_set_2  \\\n",
       "555845               34.0               31.0           15.67           14.92   \n",
       "555846               34.0               31.0           15.67           14.92   \n",
       "555847               34.0               31.0           15.67           14.92   \n",
       "555848               34.0               31.0           15.67           14.92   \n",
       "555849               34.0               31.0           15.67           14.92   \n",
       "...                   ...                ...             ...             ...   \n",
       "1080190               0.0                0.0           16.42           13.93   \n",
       "1080191               0.0                0.0           16.42           13.93   \n",
       "1080192               0.0                0.0           16.42           13.93   \n",
       "1080193               0.0                0.0           16.42           13.93   \n",
       "1080194               0.0                0.0           16.42           13.93   \n",
       "\n",
       "         dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
       "555845                          11.77                    77.80   \n",
       "555846                          11.77                    77.80   \n",
       "555847                          11.77                    77.80   \n",
       "555848                          11.77                    77.80   \n",
       "555849                          11.77                    77.80   \n",
       "...                               ...                      ...   \n",
       "1080190                          6.93                    53.66   \n",
       "1080191                          6.93                    53.66   \n",
       "1080192                          6.93                    53.66   \n",
       "1080193                          6.93                    53.66   \n",
       "1080194                          6.93                    53.66   \n",
       "\n",
       "         solar_radiation_set_1  \n",
       "555845                   147.1  \n",
       "555846                   147.1  \n",
       "555847                   147.1  \n",
       "555848                   147.1  \n",
       "555849                   147.1  \n",
       "...                        ...  \n",
       "1080190                  347.9  \n",
       "1080191                  347.9  \n",
       "1080192                  347.9  \n",
       "1080193                  347.9  \n",
       "1080194                  347.9  \n",
       "\n",
       "[524350 rows x 16 columns]"
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_filtered"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1d34127ba90>]"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plt.plot(dd[\"hp_hws_temp\"])\n",
    "plt.plot(dg[\"hp_hws_temp\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.mixture import GaussianMixture\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Generating random data for demonstration\n",
    "np.random.seed(0)\n",
    "X = processed_data\n",
    "\n",
    "# Creating the GMM instance with desired number of clusters\n",
    "gmm = GaussianMixture(n_components=2)\n",
    "\n",
    "# Fitting the model to the data\n",
    "gmm.fit(X)\n",
    "\n",
    "# Getting the cluster labels\n",
    "labels = gmm.predict(X)\n",
    "\n",
    "# Plotting the data points with colors representing different clusters\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.title('GMM Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "smartbuilding",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}