Spaces:

smartbuildings
/

smart-buildings

Sleeping

App Files Files Community

akshayballal commited on May 1

Commit

8ced29b

•

1 Parent(s): d81a75f

Add .tf and data files to .gitignore

Browse files

Files changed (2) hide show

.gitignore +2 -0
physLSTM/full_lstm.ipynb +1190 -0

.gitignore CHANGED Viewed

@@ -2,3 +2,5 @@ venv
 .venv
 .vscode
 __pycache__/

 .venv
 .vscode
 __pycache__/
+*.tf
+data

physLSTM/full_lstm.ipynb ADDED Viewed

	@@ -0,0 +1,1190 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "from datetime import datetime \n",
+    "from datetime import date\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import LSTM, Dense\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
+    "from keras.callbacks import ModelCheckpoint\n",
+    "import tensorflow as tf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "tf.config.list_physical_devices('GPU')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>date</th>\n",
+       "      <th>zone_047_hw_valve</th>\n",
+       "      <th>rtu_004_sat_sp_tn</th>\n",
+       "      <th>zone_047_temp</th>\n",
+       "      <th>zone_047_fan_spd</th>\n",
+       "      <th>rtu_004_fltrd_sa_flow_tn</th>\n",
+       "      <th>rtu_004_sa_temp</th>\n",
+       "      <th>rtu_004_pa_static_stpt_tn</th>\n",
+       "      <th>rtu_004_oa_flow_tn</th>\n",
+       "      <th>rtu_004_oadmpr_pct</th>\n",
+       "      <th>...</th>\n",
+       "      <th>zone_047_heating_sp</th>\n",
+       "      <th>Unnamed: 47_y</th>\n",
+       "      <th>hvac_S</th>\n",
+       "      <th>hp_hws_temp</th>\n",
+       "      <th>aru_001_cwr_temp</th>\n",
+       "      <th>aru_001_cws_fr_gpm</th>\n",
+       "      <th>aru_001_cws_temp</th>\n",
+       "      <th>aru_001_hwr_temp</th>\n",
+       "      <th>aru_001_hws_fr_gpm</th>\n",
+       "      <th>aru_001_hws_temp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2018-01-01 00:00:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>67.5</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9265.604</td>\n",
+       "      <td>66.1</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2018-01-01 00:01:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>67.5</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9265.604</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>6572.099162</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2018-01-01 00:02:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>67.5</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9708.240</td>\n",
+       "      <td>66.1</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>7628.832542</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2018-01-01 00:03:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>67.5</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9611.638</td>\n",
+       "      <td>66.1</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>7710.294617</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2018-01-01 00:04:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>67.5</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>9215.110</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>7139.184090</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2072149</th>\n",
+       "      <td>2020-12-31 23:58:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>63.2</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>18884.834</td>\n",
+       "      <td>64.4</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>2938.320000</td>\n",
+       "      <td>23.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>23.145000</td>\n",
+       "      <td>123.8</td>\n",
+       "      <td>56.25</td>\n",
+       "      <td>54.71</td>\n",
+       "      <td>56.4</td>\n",
+       "      <td>123.42</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>122.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2072150</th>\n",
+       "      <td>2020-12-31 23:58:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>63.2</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>18884.834</td>\n",
+       "      <td>64.4</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>2938.320000</td>\n",
+       "      <td>23.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>23.145000</td>\n",
+       "      <td>123.8</td>\n",
+       "      <td>56.25</td>\n",
+       "      <td>54.71</td>\n",
+       "      <td>56.4</td>\n",
+       "      <td>123.42</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>122.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2072151</th>\n",
+       "      <td>2020-12-31 23:59:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>63.2</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>19345.508</td>\n",
+       "      <td>64.3</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>3154.390000</td>\n",
+       "      <td>23.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>23.145000</td>\n",
+       "      <td>123.8</td>\n",
+       "      <td>56.25</td>\n",
+       "      <td>54.71</td>\n",
+       "      <td>56.4</td>\n",
+       "      <td>123.42</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>122.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2072152</th>\n",
+       "      <td>2020-12-31 23:59:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>63.2</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>19345.508</td>\n",
+       "      <td>64.3</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>3154.390000</td>\n",
+       "      <td>23.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>23.145000</td>\n",
+       "      <td>123.8</td>\n",
+       "      <td>56.25</td>\n",
+       "      <td>54.71</td>\n",
+       "      <td>56.4</td>\n",
+       "      <td>123.42</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>122.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2072153</th>\n",
+       "      <td>2021-01-01 00:00:00</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>63.2</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>18650.232</td>\n",
+       "      <td>64.1</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>3076.270000</td>\n",
+       "      <td>22.9</td>\n",
+       "      <td>...</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>69.0</td>\n",
+       "      <td>23.788947</td>\n",
+       "      <td>123.8</td>\n",
+       "      <td>56.25</td>\n",
+       "      <td>54.71</td>\n",
+       "      <td>56.4</td>\n",
+       "      <td>123.42</td>\n",
+       "      <td>61.6</td>\n",
+       "      <td>122.36</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2072154 rows × 30 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        date  zone_047_hw_valve  rtu_004_sat_sp_tn  \\\n",
+       "0        2018-01-01 00:00:00              100.0               69.0   \n",
+       "1        2018-01-01 00:01:00              100.0               69.0   \n",
+       "2        2018-01-01 00:02:00              100.0               69.0   \n",
+       "3        2018-01-01 00:03:00              100.0               69.0   \n",
+       "4        2018-01-01 00:04:00              100.0               69.0   \n",
+       "...                      ...                ...                ...   \n",
+       "2072149  2020-12-31 23:58:00              100.0               68.0   \n",
+       "2072150  2020-12-31 23:58:00              100.0               68.0   \n",
+       "2072151  2020-12-31 23:59:00              100.0               68.0   \n",
+       "2072152  2020-12-31 23:59:00              100.0               68.0   \n",
+       "2072153  2021-01-01 00:00:00              100.0               68.0   \n",
+       "\n",
+       "         zone_047_temp  zone_047_fan_spd  rtu_004_fltrd_sa_flow_tn  \\\n",
+       "0                 67.5              20.0                  9265.604   \n",
+       "1                 67.5              20.0                  9265.604   \n",
+       "2                 67.5              20.0                  9708.240   \n",
+       "3                 67.5              20.0                  9611.638   \n",
+       "4                 67.5              20.0                  9215.110   \n",
+       "...                ...               ...                       ...   \n",
+       "2072149           63.2              20.0                 18884.834   \n",
+       "2072150           63.2              20.0                 18884.834   \n",
+       "2072151           63.2              20.0                 19345.508   \n",
+       "2072152           63.2              20.0                 19345.508   \n",
+       "2072153           63.2              20.0                 18650.232   \n",
+       "\n",
+       "         rtu_004_sa_temp  rtu_004_pa_static_stpt_tn  rtu_004_oa_flow_tn  \\\n",
+       "0                   66.1                       0.06            0.000000   \n",
+       "1                   66.0                       0.06         6572.099162   \n",
+       "2                   66.1                       0.06         7628.832542   \n",
+       "3                   66.1                       0.06         7710.294617   \n",
+       "4                   66.0                       0.06         7139.184090   \n",
+       "...                  ...                        ...                 ...   \n",
+       "2072149             64.4                       0.06         2938.320000   \n",
+       "2072150             64.4                       0.06         2938.320000   \n",
+       "2072151             64.3                       0.06         3154.390000   \n",
+       "2072152             64.3                       0.06         3154.390000   \n",
+       "2072153             64.1                       0.06         3076.270000   \n",
+       "\n",
+       "         rtu_004_oadmpr_pct  ...  zone_047_heating_sp  Unnamed: 47_y  \\\n",
+       "0                      28.0  ...                  NaN            NaN   \n",
+       "1                      28.0  ...                  NaN            NaN   \n",
+       "2                      28.0  ...                  NaN            NaN   \n",
+       "3                      28.0  ...                  NaN            NaN   \n",
+       "4                      28.0  ...                  NaN            NaN   \n",
+       "...                     ...  ...                  ...            ...   \n",
+       "2072149                23.4  ...                 71.0           69.0   \n",
+       "2072150                23.4  ...                 71.0           69.0   \n",
+       "2072151                23.4  ...                 71.0           69.0   \n",
+       "2072152                23.4  ...                 71.0           69.0   \n",
+       "2072153                22.9  ...                 71.0           69.0   \n",
+       "\n",
+       "            hvac_S  hp_hws_temp  aru_001_cwr_temp  aru_001_cws_fr_gpm  \\\n",
+       "0              NaN         75.3               NaN                 NaN   \n",
+       "1              NaN         75.3               NaN                 NaN   \n",
+       "2              NaN         75.3               NaN                 NaN   \n",
+       "3              NaN         75.3               NaN                 NaN   \n",
+       "4              NaN         75.3               NaN                 NaN   \n",
+       "...            ...          ...               ...                 ...   \n",
+       "2072149  23.145000        123.8             56.25               54.71   \n",
+       "2072150  23.145000        123.8             56.25               54.71   \n",
+       "2072151  23.145000        123.8             56.25               54.71   \n",
+       "2072152  23.145000        123.8             56.25               54.71   \n",
+       "2072153  23.788947        123.8             56.25               54.71   \n",
+       "\n",
+       "         aru_001_cws_temp  aru_001_hwr_temp  aru_001_hws_fr_gpm  \\\n",
+       "0                     NaN               NaN                 NaN   \n",
+       "1                     NaN               NaN                 NaN   \n",
+       "2                     NaN               NaN                 NaN   \n",
+       "3                     NaN               NaN                 NaN   \n",
+       "4                     NaN               NaN                 NaN   \n",
+       "...                   ...               ...                 ...   \n",
+       "2072149              56.4            123.42                61.6   \n",
+       "2072150              56.4            123.42                61.6   \n",
+       "2072151              56.4            123.42                61.6   \n",
+       "2072152              56.4            123.42                61.6   \n",
+       "2072153              56.4            123.42                61.6   \n",
+       "\n",
+       "         aru_001_hws_temp  \n",
+       "0                     NaN  \n",
+       "1                     NaN  \n",
+       "2                     NaN  \n",
+       "3                     NaN  \n",
+       "4                     NaN  \n",
+       "...                   ...  \n",
+       "2072149            122.36  \n",
+       "2072150            122.36  \n",
+       "2072151            122.36  \n",
+       "2072152            122.36  \n",
+       "2072153            122.36  \n",
+       "\n",
+       "[2072154 rows x 30 columns]"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged = pd.read_csv(r'../data/long_merge.csv')\n",
+    "\n",
+    "zone = \"47\"\n",
+    "\n",
+    "if zone in [\"36\", \"37\", \"38\", \"39\", \"40\", \"41\", \"42\", \"64\", \"65\", \"66\", \"67\", \"68\", \"69\", \"70\"]:\n",
+    "    rtu = \"rtu_001\"\n",
+    "    wing = \"hvac_N\"\n",
+    "elif zone in [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\"]:\n",
+    "    rtu = \"rtu_003\"\n",
+    "    wing = \"hvac_S\"\n",
+    "elif zone in [\"16\", \"17\", \"21\", \"22\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]:\n",
+    "    rtu = \"rtu_004\"\n",
+    "    wing = \"hvac_S\"\n",
+    "else:\n",
+    "    rtu = \"rtu_002\"\n",
+    "    wing = \"hvac_N\"\n",
+    "#merged is the dataframe\n",
+    "sorted = merged[[\"date\"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+[\"hp_hws_temp\", \"aru_001_cwr_temp\" , \"aru_001_cws_fr_gpm\" ,\"aru_001_cws_temp\",\"aru_001_hwr_temp\" ,\"aru_001_hws_fr_gpm\" ,\"aru_001_hws_temp\"]]\n",
+    "sorted"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>date</th>\n",
+       "      <th>hp_hws_temp</th>\n",
+       "      <th>rtu_003_sat_sp_tn</th>\n",
+       "      <th>rtu_003_fltrd_sa_flow_tn</th>\n",
+       "      <th>rtu_003_sa_temp</th>\n",
+       "      <th>rtu_003_pa_static_stpt_tn</th>\n",
+       "      <th>rtu_003_oa_flow_tn</th>\n",
+       "      <th>rtu_003_oadmpr_pct</th>\n",
+       "      <th>rtu_003_econ_stpt_tn</th>\n",
+       "      <th>rtu_003_ra_temp</th>\n",
+       "      <th>...</th>\n",
+       "      <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
+       "      <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
+       "      <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
+       "      <th>wifi_third_south</th>\n",
+       "      <th>wifi_fourth_south</th>\n",
+       "      <th>air_temp_set_1</th>\n",
+       "      <th>air_temp_set_2</th>\n",
+       "      <th>dew_point_temperature_set_1d</th>\n",
+       "      <th>relative_humidity_set_1</th>\n",
+       "      <th>solar_radiation_set_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2018-01-01 00:00:00</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>13558.539</td>\n",
+       "      <td>65.5</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>67.9</td>\n",
+       "      <td>...</td>\n",
+       "      <td>49.9</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11.64</td>\n",
+       "      <td>11.51</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>79.07</td>\n",
+       "      <td>86.7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2018-01-01 00:01:00</td>\n",
+       "      <td>75.3</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>13592.909</td>\n",
+       "      <td>65.6</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>5992.059572</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>65.0</td>\n",
+       "      <td>67.9</td>\n",
+       "      <td>...</td>\n",
+       "      <td>49.4</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11.64</td>\n",
+       "      <td>11.51</td>\n",
+       "      <td>8.1</td>\n",
+       "      <td>79.07</td>\n",
+       "      <td>86.7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  date  hp_hws_temp  rtu_003_sat_sp_tn  \\\n",
+       "0  2018-01-01 00:00:00         75.3               65.0   \n",
+       "1  2018-01-01 00:01:00         75.3               65.0   \n",
+       "\n",
+       "   rtu_003_fltrd_sa_flow_tn  rtu_003_sa_temp  rtu_003_pa_static_stpt_tn  \\\n",
+       "0                 13558.539             65.5                        0.6   \n",
+       "1                 13592.909             65.6                        0.6   \n",
+       "\n",
+       "   rtu_003_oa_flow_tn  rtu_003_oadmpr_pct  rtu_003_econ_stpt_tn  \\\n",
+       "0            0.000000                34.6                  65.0   \n",
+       "1         5992.059572                34.6                  65.0   \n",
+       "\n",
+       "   rtu_003_ra_temp  ...  rtu_003_rf_vfd_spd_fbk_tn  \\\n",
+       "0             67.9  ...                       49.9   \n",
+       "1             67.9  ...                       49.4   \n",
+       "\n",
+       "   rtu_003_fltrd_gnd_lvl_plenum_press_tn  rtu_003_fltrd_lvl2_plenum_press_tn  \\\n",
+       "0                                   0.04                                0.05   \n",
+       "1                                   0.04                                0.04   \n",
+       "\n",
+       "   wifi_third_south  wifi_fourth_south  air_temp_set_1  air_temp_set_2  \\\n",
+       "0               NaN                NaN           11.64           11.51   \n",
+       "1               NaN                NaN           11.64           11.51   \n",
+       "\n",
+       "   dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
+       "0                           8.1                    79.07   \n",
+       "1                           8.1                    79.07   \n",
+       "\n",
+       "   solar_radiation_set_1  \n",
+       "0                   86.7  \n",
+       "1                   86.7  \n",
+       "\n",
+       "[2 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rtu = [\"rtu_003\"]\n",
+    "# wing = [\"hvac_N\",\"hvac_S\"]\n",
+    "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
+    "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
+    "[\"rtu_003_ma_temp\",]\n",
+    "# any(sub in col for sub in zone) or\n",
+    "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n",
+    "                               any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n",
+    "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
+    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
+    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
+    "# df_filtered = df_filtered.dropna()\n",
+    "df_filtered.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
+    "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2018, 1, 1)) & (df_filtered.date.dt.date< date(2021, 1, 1))]\n",
+    "# df_filtered.isna().sum()\n",
+    "df_filtered = df_filtered.ffill()\n",
+    "df_filtered = df_filtered.bfill()\n",
+    "if df_filtered.isna().any().any():\n",
+    "    print(\"There are NA values in the DataFrame columns.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n",
+    " 'rtu_003_sa_temp',\n",
+    " 'rtu_003_oadmpr_pct',\n",
+    " 'rtu_003_ra_temp',\n",
+    " 'rtu_003_oa_temp',\n",
+    " 'rtu_003_ma_temp',\n",
+    " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
+    " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
+    " 'wifi_fourth_south',\n",
+    " 'air_temp_set_1',\n",
+    " 'air_temp_set_2',\n",
+    " 'dew_point_temperature_set_1d',\n",
+    " 'relative_humidity_set_1',\n",
+    " 'solar_radiation_set_1']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "testdataset_df = df_filtered[(df_filtered.date.dt.date >date(2020, 3, 11))]\n",
+    "\n",
+    "# traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
+    "\n",
+    "traindataset_df = df_filtered[ (df_filtered.date.dt.date <date(2020, 3, 11))]\n",
+    "testdataset = testdataset_df.drop(columns=[\"date\"]).rolling(window = 10, step=5, min_periods=1).mean().values\n",
+    "\n",
+    "traindataset = traindataset_df.drop(columns=[\"date\"]).rolling(window = 10, step=5, min_periods=1).mean().values\n",
+    "\n",
+    "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
+    "columns_with_na"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1157787, 909910)"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(traindataset_df), len(testdataset_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traindataset = traindataset.astype('float32')\n",
+    "testdataset = testdataset.astype('float32')\n",
+    "\n",
+    "scaler = StandardScaler()\n",
+    "traindataset = scaler.fit_transform(traindataset)\n",
+    "testdataset = scaler.transform(testdataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train,test = traindataset,testdataset\n",
+    "\n",
+    "def create_dataset(dataset,time_step):\n",
+    "    x = [[] for _ in range(15)] \n",
+    "    Y = []\n",
+    "    for i in range(len(dataset) - time_step - 1):\n",
+    "        for j in range(15):\n",
+    "            x[j].append(dataset[i:(i + time_step), j])\n",
+    "        Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n",
+    "                  dataset[i + time_step, 6],dataset[i + time_step, 7]])\n",
+    "    x= [np.array(feature_list) for feature_list in x]\n",
+    "    Y = np.reshape(Y,(len(Y),8))\n",
+    "    return np.stack(x,axis=2),Y\n",
+    "\n",
+    "time_step = 30\n",
+    "X_train, y_train = create_dataset(train, time_step)\n",
+    "X_test, y_test = create_dataset(test, time_step)\n",
+    "\n",
+    "\n",
+    "model = Sequential()\n",
+    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
+    "model.add(LSTM(units=50, return_sequences=True))\n",
+    "model.add(LSTM(units=30))\n",
+    "model.add(Dense(units=8))\n",
+    "\n",
+    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
+    "\n",
+    "checkpoint_path = \"lstm_smooth_01.tf\"\n",
+    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
+    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1b2861bd190>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.load_weights(checkpoint_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5686/5686 [==============================] - 27s 5ms/step\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_predict1 = model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib qt\n",
+    "var = 3\n",
+    "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
+    "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
+    "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)\n",
+    "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
+    "\n",
+    "\n",
+    "plt.title('Testing Data - Predicted vs Actual')\n",
+    "plt.xlabel('Time')\n",
+    "plt.ylabel('Value')\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.mixture import GaussianMixture\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "# Generating random data for demonstration\n",
+    "np.random.seed(0)\n",
+    "X =  test_predict1 - y_test\n",
+    "\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "# Creating the GMM instance with desired number of clusters\n",
+    "gmm = GaussianMixture(n_components=2)\n",
+    "\n",
+    "# Fitting the model to the data\n",
+    "gmm.fit(X)\n",
+    "\n",
+    "# Getting the cluster labels\n",
+    "labels = gmm.predict(X)\n",
+    "\n",
+    "# Plotting the data points with colors representing different clusters\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.title('GMM Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.cluster import KMeans\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "# Generating random data for demonstration\n",
+    "np.random.seed(0)\n",
+    "X = (test_predict1 - y_test) * scaler.var_[0:8] + scaler.mean_[0:8]\n",
+    "\n",
+    "k = 6\n",
+    "\n",
+    "kmeans = KMeans(n_clusters=k)\n",
+    "\n",
+    "kmeans.fit(X)\n",
+    "\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Getting the cluster centers and labels\n",
+    "centroids = kmeans.cluster_centers_\n",
+    "centroids = pca.transform(centroids)\n",
+    "labels = kmeans.labels_\n",
+    "\n",
+    "# Plotting the data points and cluster centers\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
+    "plt.title('KMeans Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = 60\n",
+    "X= test_predict1 - y_test\n",
+    "processed_data = []\n",
+    "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
+    "for i in range(0,len(X), 60):\n",
+    "    mean = X[i:i+k].mean(axis = 0)\n",
+    "    std = X[i:i+k].std(axis = 0)\n",
+    "    max = X[i:i+k].max(axis = 0)\n",
+    "    min = X[i:i+k].min(axis = 0)\n",
+    "    iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
+    "    data = np.concatenate([mean, std, max, min, iqr])\n",
+    "    processed_data.append([data])\n",
+    "processed_data = np.concatenate(processed_data,axis=0) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = processed_data\n",
+    "\n",
+    "kmeans = KMeans(n_clusters=3, algorithm='elkan', max_iter=1000, n_init = 5)\n",
+    "\n",
+    "kmeans.fit(X)\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "# Getting the cluster centers and labels\n",
+    "centroids = kmeans.cluster_centers_\n",
+    "centroids = pca.transform(centroids)\n",
+    "labels = kmeans.labels_\n",
+    "\n",
+    "# Plotting the data points and cluster centers\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
+    "plt.title('KMeans Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.mixture import GaussianMixture\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "# Generating random data for demonstration\n",
+    "np.random.seed(0)\n",
+    "X = processed_data\n",
+    "\n",
+    "# Creating the GMM instance with desired number of clusters\n",
+    "gmm = GaussianMixture(n_components=3, init_params='k-means++')\n",
+    "\n",
+    "# Fitting the model to the data\n",
+    "gmm.fit(X)\n",
+    "labels = gmm.predict(X)\n",
+    "\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "# Getting the cluster labels\n",
+    "\n",
+    "# Plotting the data points with colors representing different clusters\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.title('GMM Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(181982, 15)"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "testdataset.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(181951, 8)"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_predict1.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([108.04575472,  65.85715493,  47.79928153,  71.09534962,\n",
+       "        56.33539828,  67.06136834,  73.87258151,  51.46057509,\n",
+       "        32.91318188,  28.12291834,  13.58804695,  13.24250204,\n",
+       "         6.3366788 ,  66.41283778, 176.8329019 ])"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "scaler.mean_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([2.23555351e+02, 4.88454343e+00, 6.76207201e+02, 3.86856317e+00,\n",
+       "       6.72235289e+01, 7.04553897e+00, 2.03829988e+02, 1.46671335e+02,\n",
+       "       1.53229114e+02, 1.01090815e+02, 2.37177860e+01, 1.97707428e+01,\n",
+       "       2.76565556e+01, 4.60824153e+02, 6.83930692e+04])"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "scaler.var_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[109.83607997,  65.7232677 , 102.42839746, ...,  67.14066092,\n",
+       "         90.56450819,  66.22438437],\n",
+       "       [100.28441846,  66.40819637, 123.52383974, ...,  68.39884677,\n",
+       "         71.74945776,  60.3140524 ],\n",
+       "       [100.83776313,  65.46071865, -55.82973994, ...,  66.55045523,\n",
+       "         64.49064254,  66.48224704],\n",
+       "       ...,\n",
+       "       [ 70.86386298,  65.98717901, 118.99624806, ...,  67.35991191,\n",
+       "         43.36234531,  29.05084393],\n",
+       "       [ 71.26526339,  65.9891675 , 118.33246354, ...,  67.25223838,\n",
+       "         50.88386299,  46.49937637],\n",
+       "       [ 71.28495765,  65.85019898, 114.35237621, ...,  67.29575831,\n",
+       "         40.09704965,  20.1328048 ]])"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.cluster import KMeans\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "# Generating random data for demonstration\n",
+    "np.random.seed(0)\n",
+    "X = (test_predict1-y_test) * scaler.var_[0:8] + scaler.mean_[0:8]\n",
+    "k = 6\n",
+    "\n",
+    "kmeans = KMeans(n_clusters=k)\n",
+    "\n",
+    "kmeans.fit(X)\n",
+    "\n",
+    "\n",
+    "pca = PCA(n_components=2)\n",
+    "X = pca.fit_transform(X)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Getting the cluster centers and labels\n",
+    "centroids = kmeans.cluster_centers_\n",
+    "centroids = pca.transform(centroids)\n",
+    "labels = kmeans.labels_\n",
+    "\n",
+    "# Plotting the data points and cluster centers\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
+    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
+    "plt.title('KMeans Clustering')\n",
+    "plt.xlabel('Feature 1')\n",
+    "plt.ylabel('Feature 2')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tensorflow",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}