Spaces:

smartbuildings
/

smart-buildings

Sleeping

App Files Files Community

levimohle commited on May 22, 2024

Commit

59b2cc9

1 Parent(s): 3beb62e

Added function to fill data gaps

Browse files

Files changed (3) hide show

EnergyLSTM/EDA_lstm_energy.ipynb +244 -57
EnergyLSTM/lstm_energy.ipynb +200 -16
EnergyLSTM/lstm_energy_01.keras +0 -0

EnergyLSTM/EDA_lstm_energy.ipynb CHANGED Viewed

@@ -2,36 +2,24 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n",
-      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
-     ]
-    }
-   ],
    "source": [
     "import pandas as pd \n",
     "from datetime import datetime \n",
-    "from datetime import date\n",
     "import matplotlib.pyplot as plt\n",
     "# import seaborn as sns\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "from keras.models import Sequential\n",
-    "from keras.layers import LSTM, Dense\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
-    "from keras.callbacks import ModelCheckpoint\n",
     "\n",
     "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n",
     "\n",
     "### Load ALL data ###\n",
-    "all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")"
    ]
   },
   {
@@ -43,22 +31,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\levim\\AppData\\Local\\Temp\\ipykernel_27084\\3547628995.py:5: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n"
-     ]
-    }
-   ],
    "source": [
     "# Prepar energy data set with extended features\n",
     "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n",
@@ -75,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "metadata": {
     "vscode": {
      "languageId": "ruby"
@@ -84,43 +59,255 @@
    "outputs": [],
    "source": [
     "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n",
-    "window_size = 12*4 # Half a day\n",
     "eed_15m_avg = eed_15m.copy()\n",
     "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
-    "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "(array([17591., 17652., 17713., 17775., 17836., 17897.]),\n",
-       " [Text(17591.0, 0, '2018-03'),\n",
-       "  Text(17652.0, 0, '2018-05'),\n",
-       "  Text(17713.0, 0, '2018-07'),\n",
-       "  Text(17775.0, 0, '2018-09'),\n",
-       "  Text(17836.0, 0, '2018-11'),\n",
-       "  Text(17897.0, 0, '2019-01')])"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "%matplotlib qt\n",
     "\n",
-    "start_date  = '2018-02-02'\n",
-    "end_date    = '2019-02-03'\n",
     "\n",
-    "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n",
-    "plt.plot(eed_15m['moving_average'].loc[start_date:end_date])\n",
-    "plt.xticks(rotation=45)"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "import pandas as pd \n",
     "from datetime import datetime \n",
+    "from datetime import timedelta\n",
     "import matplotlib.pyplot as plt\n",
     "# import seaborn as sns\n",
     "import numpy as np\n",
     "import pandas as pd\n",
+    "from statsmodels.tsa.holtwinters import ExponentialSmoothing\n",
     "\n",
     "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n",
     "\n",
     "### Load ALL data ###\n",
+    "# all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")\n",
+    "all_data = pd.read_csv(dataPATH + r\"\\extended_energy_data.csv\")"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Prepar energy data set with extended features\n",
     "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n",
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "vscode": {
      "languageId": "ruby"
    "outputs": [],
    "source": [
     "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n",
+    "window_size = 4*4 # 4 hours\n",
     "eed_15m_avg = eed_15m.copy()\n",
     "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
+    "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
+    "\n",
+    "window_size = 4 # 4 hours\n",
+    "eed_1h_avg = eed_1h.copy()\n",
+    "eed_1h_avg['hvac_N'] = eed_1h['hvac_N'].rolling(window=window_size).mean()\n",
+    "eed_1h_avg['hvac_S'] = eed_1h['hvac_S'].rolling(window=window_size).mean()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib qt\n",
+    "\n",
+    "start_date  = '2018-06-02'\n",
+    "end_date    = '2018-06-08'\n",
+    "\n",
+    "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n",
+    "plt.plot(eed_15m_avg['hvac_N'].loc[start_date:end_date])\n",
+    "plt.plot(eed_1h_avg['hvac_N'].loc[start_date:end_date])\n",
+    "plt.xticks(rotation=45)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib qt\n",
+    "\n",
+    "plt.figure(figsize=(20,10))\n",
+    "plt.plot(eed_1h['hvac_S'])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eed_1h[eed_1h['hvac_S'].isna()]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Filling data gaps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fillgap(firstTS, secondTS, seasonal_periods):\n",
+    "    \n",
+    "    #PREPARATION\n",
+    "    one = timedelta(hours=1)\n",
+    "    secondTSr = secondTS[::-1].copy()\n",
+    "    firstTSr = firstTS[::-1].copy()\n",
+    "    indexr = pd.date_range(start=firstTS.index[0], end=secondTS.index[-1], freq='h')\n",
+    "    firstTSr.index = indexr[-len(firstTSr):]\n",
+    "    secondTSr.index = indexr[:len(secondTSr)]\n",
+    "    \n",
+    "    #FORWARD    \n",
+    "    es = ExponentialSmoothing(firstTS,  seasonal_periods=seasonal_periods,seasonal='add').fit()\n",
+    "    forwardPrediction = es.predict(start=firstTS.index[-1]+one, end=secondTS.index[0]-one)\n",
+    "    \n",
+    "    #BACKWARD\n",
+    "    es = ExponentialSmoothing(secondTSr,  seasonal_periods=seasonal_periods,seasonal='add').fit()\n",
+    "    backwardPrediction = es.predict(start=secondTSr.index[-1]+one, end=firstTSr.index[0]-one)\n",
+    "    \n",
+    "    #INTERPOLATION\n",
+    "    l = len(forwardPrediction)\n",
+    "    interpolation = pd.Series([(backwardPrediction[i] * i + forwardPrediction[i] * (l -i) )/ l for i in range(l)], index=forwardPrediction.index.copy())\n",
+    "  \n",
+    "    return interpolation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Function to split the data into multiple DataFrames based on the gaps\n",
+    "def split_dfs(data):\n",
+    "\n",
+    "    # Prepare the DataFrame\n",
+    "    df = data.copy()\n",
+    "    df = df.reset_index()\n",
+    "    df= df.dropna()\n",
+    "    \n",
+    "    # Set the maximum allowable gap (e.g., 1 hour)\n",
+    "    max_gap = pd.Timedelta(hours=1)\n",
+    "\n",
+    "    # Calculate the differences between consecutive timestamps\n",
+    "    time_diff = df['date'].diff()\n",
+    "\n",
+    "    # Identify gaps larger than the maximum allowable gap\n",
+    "    gaps = time_diff > max_gap\n",
+    "\n",
+    "    # Create a new column to identify different groups\n",
+    "    df['group'] = gaps.cumsum()\n",
+    "\n",
+    "    df.set_index('date', inplace=True)\n",
+    "\n",
+    "    # Split the DataFrame into a list of DataFrames based on the groups\n",
+    "    dfs = [group for _, group in df.groupby('group')]\n",
+    "\n",
+    "    return dfs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seasonal_periods = 24\n",
+    "dfs = split_dfs(eed_1h[['hvac_N']])\n",
+    "\n",
+    "# Interpolate the gaps between the DataFrames\n",
+    "ip_df = pd.DataFrame()\n",
+    "for ii in range(len(dfs)-1):\n",
+    "    if (len(dfs[ii]) > 2*seasonal_periods+10) and (len(dfs[ii+1]) > 2*seasonal_periods+10):\n",
+    "        interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
+    "        ip_df = pd.concat([ip_df,interpolation])\n",
+    "    else:\n",
+    "        continue"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
+      "  warnings.warn(\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
+      "  warnings.warn(\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
+      "  warnings.warn(\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
+      "  warnings.warn(\n"
+     ]
     }
    ],
    "source": [
+    "seasonal_periods = 24\n",
+    "dfs = split_dfs(eed_1h[['hvac_N']])\n",
     "\n",
+    "# Interpolate the gaps between the DataFrames\n",
+    "ip_df = pd.DataFrame()\n",
+    "for ii in range(len(dfs)-1):\n",
+    "    seasonal_periods = max(min([len(dfs[ii]), len(dfs[ii+1])]) // 2 -  10, 2)\n",
+    "    \n",
+    "    if seasonal_periods > 2*24*7 + 10: # Using more than 1 week of seasonal patterns is not necessary\n",
+    "        seasonal_periods = 24*7\n",
+    "        interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
+    "    else:\n",
+    "        interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
     "\n",
+    "    ip_df = pd.concat([ip_df,interpolation])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib qt\n",
+    "plt.plot(eed_1h['hvac_N'])\n",
+    "plt.plot(ip_df)\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seasonal_periods=2\n",
+    "for ii in range(len(dfs)-1):\n",
+    "    interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
+    "    ip_df = pd.concat([ip_df,interpolation])"
    ]
   },
   {

EnergyLSTM/lstm_energy.ipynb CHANGED Viewed

@@ -132,13 +132,21 @@
     "extended_energy_data.set_index('date', inplace=True)\n",
     "\n",
     "eed_15m = extended_energy_data.resample('15T').mean()\n",
     "eed_15m = eed_15m.reset_index(drop=False)\n",
     "\n",
-    "window_size = 12*4 # Half a day\n",
     "eed_15m_avg = eed_15m.copy()\n",
     "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
     "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
     "\n",
     "eed_15m.head()"
    ]
   },
@@ -210,6 +218,31 @@
     "testdataset = scaler.transform(testdataset)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 35,
@@ -277,15 +310,7 @@
     "X_train, y_train = create_dataset(train, time_step)\n",
     "X_test, y_test = create_dataset(test, time_step)\n",
     "\n",
-    "\n",
-    "model = Sequential()\n",
-    "model.add(LSTM(units=50, return_sequences=True, dropout= 0.2, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
-    "model.add(LSTM(units=50, dropout= 0.2, return_sequences=True))\n",
-    "model.add(LSTM(units=time_step*no_outputs))\n",
-    "model.add(Dense(units=time_step*no_outputs))\n",
-    "\n",
-    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
-    "\n",
     "checkpoint_path = \"lstm_energy_01.keras\"\n",
     "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
     "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
@@ -293,16 +318,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "86/86 [==============================] - 0s 3ms/step - loss: 0.0186\n",
-      "86/86 [==============================] - 1s 3ms/step\n",
-      "Loss:  0.01863059028983116\n"
      ]
     }
    ],
@@ -318,9 +343,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
    "metadata": {},
-   "outputs": [],
    "source": [
     "%matplotlib qt\n",
     "\n",
@@ -372,6 +409,153 @@
     "plt.legend()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,

     "extended_energy_data.set_index('date', inplace=True)\n",
     "\n",
     "eed_15m = extended_energy_data.resample('15T').mean()\n",
+    "eed_1h = extended_energy_data.resample('60T').mean()\n",
+    "\n",
     "eed_15m = eed_15m.reset_index(drop=False)\n",
+    "eed_1h = eed_1h.reset_index(drop=False)\n",
     "\n",
+    "window_size = 4*4 # 4 hours\n",
     "eed_15m_avg = eed_15m.copy()\n",
     "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
     "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
     "\n",
+    "window_size = 4 # 4 hours\n",
+    "eed_1h_avg = eed_1h.copy()\n",
+    "eed_1h_avg['hvac_N'] = eed_1h['hvac_N'].rolling(window=window_size).mean()\n",
+    "eed_1h_avg['hvac_S'] = eed_1h['hvac_S'].rolling(window=window_size).mean()\n",
+    "\n",
     "eed_15m.head()"
    ]
   },
     "testdataset = scaler.transform(testdataset)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_model(X_train, time_step, no_outputs):\n",
+    "    model = Sequential()\n",
+    "    model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
+    "    model.add(LSTM(units=50, return_sequences=True))\n",
+    "    model.add(LSTM(units=time_step*no_outputs))\n",
+    "    model.add(Dense(units=time_step*no_outputs))\n",
+    "\n",
+    "    model.compile(optimizer='adam', loss='mean_squared_error')\n",
+    "\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model 1 (continous predictions)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 35,
     "X_train, y_train = create_dataset(train, time_step)\n",
     "X_test, y_test = create_dataset(test, time_step)\n",
     "\n",
+    "model = create_model(X_train, time_step, no_outputs)\n",
     "checkpoint_path = \"lstm_energy_01.keras\"\n",
     "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
     "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
   },
   {
    "cell_type": "code",
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "4/4 [==============================] - 0s 4ms/step - loss: 0.0153\n",
+      "4/4 [==============================] - 1s 4ms/step\n",
+      "Loss:  0.01531214825809002\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 52,
    "metadata": {},
+   "outputs": [
+    {
+     "ename": "IndexError",
+     "evalue": "index 106 is out of bounds for axis 0 with size 106",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[52], line 10\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[38;5;66;03m# Loop over the value index\u001b[39;00m\n\u001b[0;32m      8\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(axes\u001b[38;5;241m.\u001b[39mflat):\n\u001b[0;32m      9\u001b[0m     \u001b[38;5;66;03m# Plot your data or perform any other operations\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m     ax\u001b[38;5;241m.\u001b[39mplot(\u001b[43my_test\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvar\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43mtime_step\u001b[49m\u001b[43m]\u001b[49m, label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOriginal Testing Data\u001b[39m\u001b[38;5;124m'\u001b[39m, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblue\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m     11\u001b[0m     ax\u001b[38;5;241m.\u001b[39mplot(test_predict1[var\u001b[38;5;241m+\u001b[39mi,\u001b[38;5;241m0\u001b[39m:time_step], label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPredicted Testing Data\u001b[39m\u001b[38;5;124m'\u001b[39m, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mred\u001b[39m\u001b[38;5;124m'\u001b[39m,alpha\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.8\u001b[39m)\n\u001b[0;32m     12\u001b[0m     \u001b[38;5;66;03m# ax.set_title(f'Plot {i+1}')\u001b[39;00m\n",
+      "\u001b[1;31mIndexError\u001b[0m: index 106 is out of bounds for axis 0 with size 106"
+     ]
+    }
+   ],
    "source": [
     "%matplotlib qt\n",
     "\n",
     "plt.legend()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model 2 (Predicting once per day)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0850 \n",
+      "Epoch 1: val_loss improved from inf to 0.07467, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 7s 131ms/step - loss: 0.0791 - val_loss: 0.0747\n",
+      "Epoch 2/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0487\n",
+      "Epoch 2: val_loss improved from 0.07467 to 0.03484, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 20ms/step - loss: 0.0419 - val_loss: 0.0348\n",
+      "Epoch 3/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0262\n",
+      "Epoch 3: val_loss improved from 0.03484 to 0.02388, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 17ms/step - loss: 0.0241 - val_loss: 0.0239\n",
+      "Epoch 4/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0180\n",
+      "Epoch 4: val_loss improved from 0.02388 to 0.02059, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 18ms/step - loss: 0.0174 - val_loss: 0.0206\n",
+      "Epoch 5/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0134\n",
+      "Epoch 5: val_loss improved from 0.02059 to 0.01839, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 18ms/step - loss: 0.0130 - val_loss: 0.0184\n",
+      "Epoch 6/20\n",
+      " 8/10 [=======================>......] - ETA: 0s - loss: 0.0107\n",
+      "Epoch 6: val_loss did not improve from 0.01839\n",
+      "10/10 [==============================] - 0s 21ms/step - loss: 0.0106 - val_loss: 0.0255\n",
+      "Epoch 7/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0090\n",
+      "Epoch 7: val_loss did not improve from 0.01839\n",
+      "10/10 [==============================] - 0s 14ms/step - loss: 0.0090 - val_loss: 0.0261\n",
+      "Epoch 8/20\n",
+      "10/10 [==============================] - ETA: 0s - loss: 0.0085\n",
+      "Epoch 8: val_loss did not improve from 0.01839\n",
+      "10/10 [==============================] - 0s 18ms/step - loss: 0.0085 - val_loss: 0.0197\n",
+      "Epoch 9/20\n",
+      " 9/10 [==========================>...] - ETA: 0s - loss: 0.0074\n",
+      "Epoch 9: val_loss improved from 0.01839 to 0.01687, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 22ms/step - loss: 0.0074 - val_loss: 0.0169\n",
+      "Epoch 10/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0066\n",
+      "Epoch 10: val_loss did not improve from 0.01687\n",
+      "10/10 [==============================] - 0s 14ms/step - loss: 0.0068 - val_loss: 0.0171\n",
+      "Epoch 11/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0063\n",
+      "Epoch 11: val_loss did not improve from 0.01687\n",
+      "10/10 [==============================] - 0s 14ms/step - loss: 0.0061 - val_loss: 0.0191\n",
+      "Epoch 12/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0057\n",
+      "Epoch 12: val_loss improved from 0.01687 to 0.01678, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 18ms/step - loss: 0.0057 - val_loss: 0.0168\n",
+      "Epoch 13/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0052\n",
+      "Epoch 13: val_loss did not improve from 0.01678\n",
+      "10/10 [==============================] - 0s 13ms/step - loss: 0.0058 - val_loss: 0.0206\n",
+      "Epoch 14/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0058\n",
+      "Epoch 14: val_loss improved from 0.01678 to 0.01612, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 20ms/step - loss: 0.0062 - val_loss: 0.0161\n",
+      "Epoch 15/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0061\n",
+      "Epoch 15: val_loss did not improve from 0.01612\n",
+      "10/10 [==============================] - 0s 14ms/step - loss: 0.0059 - val_loss: 0.0184\n",
+      "Epoch 16/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0054\n",
+      "Epoch 16: val_loss improved from 0.01612 to 0.01561, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 17ms/step - loss: 0.0053 - val_loss: 0.0156\n",
+      "Epoch 17/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0046\n",
+      "Epoch 17: val_loss did not improve from 0.01561\n",
+      "10/10 [==============================] - 0s 13ms/step - loss: 0.0048 - val_loss: 0.0166\n",
+      "Epoch 18/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0054\n",
+      "Epoch 18: val_loss improved from 0.01561 to 0.01503, saving model to lstm_energy_01.keras\n",
+      "10/10 [==============================] - 0s 18ms/step - loss: 0.0052 - val_loss: 0.0150\n",
+      "Epoch 19/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0050\n",
+      "Epoch 19: val_loss did not improve from 0.01503\n",
+      "10/10 [==============================] - 0s 13ms/step - loss: 0.0046 - val_loss: 0.0156\n",
+      "Epoch 20/20\n",
+      " 6/10 [=================>............] - ETA: 0s - loss: 0.0045\n",
+      "Epoch 20: val_loss did not improve from 0.01503\n",
+      "10/10 [==============================] - 0s 14ms/step - loss: 0.0045 - val_loss: 0.0153\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x25e3a8cf640>"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train,test = traindataset,testdataset\n",
+    "steps_in_past   = 7   \n",
+    "time_step       = 24\n",
+    "no_inputs       = 5\n",
+    "no_outputs      = 2\n",
+    "def create_dataset(dataset,time_step):\n",
+    "    x = [[] for _ in range(no_inputs)] \n",
+    "    Y = [[] for _ in range(no_outputs)]\n",
+    "    for i in range(steps_in_past, round(len(dataset)/24) - steps_in_past): # -time_step is to ensure that the Y value has enough values\n",
+    "        for j in range(no_inputs):\n",
+    "            x[j].append(dataset[(i-steps_in_past)*time_step:i*time_step, j])\n",
+    "        for j in range(no_outputs):\n",
+    "            Y[j].append(dataset[i*time_step:(i+1)*time_step, j])    \n",
+    "    x = [np.array(feature_list) for feature_list in x]\n",
+    "    x = np.stack(x,axis=1)\n",
+    "    Y = [np.array(feature_list) for feature_list in Y] \n",
+    "    Y = np.stack(Y,axis=1)\n",
+    "    Y = np.reshape(Y, (Y.shape[0], time_step*no_outputs))\n",
+    "    return x, Y\n",
+    "\n",
+    "\n",
+    "X_train, y_train = create_dataset(train, time_step)\n",
+    "X_test, y_test = create_dataset(test, time_step)\n",
+    "\n",
+    "model = create_model(X_train, time_step, no_outputs)\n",
+    "checkpoint_path = \"lstm_energy_01.keras\"\n",
+    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
+    "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,

EnergyLSTM/lstm_energy_01.keras DELETED Viewed

Binary file (574 kB)