levimohle commited on
Commit
59b2cc9
1 Parent(s): 3beb62e

Added function to fill data gaps

Browse files
EnergyLSTM/EDA_lstm_energy.ipynb CHANGED
@@ -2,36 +2,24 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stderr",
10
- "output_type": "stream",
11
- "text": [
12
- "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n",
13
- " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
14
- ]
15
- }
16
- ],
17
  "source": [
18
  "import pandas as pd \n",
19
  "from datetime import datetime \n",
20
- "from datetime import date\n",
21
  "import matplotlib.pyplot as plt\n",
22
  "# import seaborn as sns\n",
23
  "import numpy as np\n",
24
  "import pandas as pd\n",
25
- "from keras.models import Sequential\n",
26
- "from keras.layers import LSTM, Dense\n",
27
- "from sklearn.model_selection import train_test_split\n",
28
- "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
29
- "from keras.callbacks import ModelCheckpoint\n",
30
  "\n",
31
  "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n",
32
  "\n",
33
  "### Load ALL data ###\n",
34
- "all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")"
 
35
  ]
36
  },
37
  {
@@ -43,22 +31,9 @@
43
  },
44
  {
45
  "cell_type": "code",
46
- "execution_count": 20,
47
  "metadata": {},
48
- "outputs": [
49
- {
50
- "name": "stderr",
51
- "output_type": "stream",
52
- "text": [
53
- "C:\\Users\\levim\\AppData\\Local\\Temp\\ipykernel_27084\\3547628995.py:5: SettingWithCopyWarning: \n",
54
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
55
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
56
- "\n",
57
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
58
- " extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n"
59
- ]
60
- }
61
- ],
62
  "source": [
63
  "# Prepar energy data set with extended features\n",
64
  "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n",
@@ -75,7 +50,7 @@
75
  },
76
  {
77
  "cell_type": "code",
78
- "execution_count": 27,
79
  "metadata": {
80
  "vscode": {
81
  "languageId": "ruby"
@@ -84,43 +59,255 @@
84
  "outputs": [],
85
  "source": [
86
  "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n",
87
- "window_size = 12*4 # Half a day\n",
88
  "eed_15m_avg = eed_15m.copy()\n",
89
  "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
90
- "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()"
 
 
 
 
 
91
  ]
92
  },
93
  {
94
  "cell_type": "code",
95
- "execution_count": 26,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  "metadata": {},
97
  "outputs": [
98
  {
99
- "data": {
100
- "text/plain": [
101
- "(array([17591., 17652., 17713., 17775., 17836., 17897.]),\n",
102
- " [Text(17591.0, 0, '2018-03'),\n",
103
- " Text(17652.0, 0, '2018-05'),\n",
104
- " Text(17713.0, 0, '2018-07'),\n",
105
- " Text(17775.0, 0, '2018-09'),\n",
106
- " Text(17836.0, 0, '2018-11'),\n",
107
- " Text(17897.0, 0, '2019-01')])"
108
- ]
109
- },
110
- "execution_count": 26,
111
- "metadata": {},
112
- "output_type": "execute_result"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
  ],
115
  "source": [
116
- "%matplotlib qt\n",
 
117
  "\n",
118
- "start_date = '2018-02-02'\n",
119
- "end_date = '2019-02-03'\n",
 
 
 
 
 
 
 
 
120
  "\n",
121
- "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n",
122
- "plt.plot(eed_15m['moving_average'].loc[start_date:end_date])\n",
123
- "plt.xticks(rotation=45)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  ]
125
  },
126
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": null,
6
  "metadata": {},
7
+ "outputs": [],
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import pandas as pd \n",
10
  "from datetime import datetime \n",
11
+ "from datetime import timedelta\n",
12
  "import matplotlib.pyplot as plt\n",
13
  "# import seaborn as sns\n",
14
  "import numpy as np\n",
15
  "import pandas as pd\n",
16
+ "from statsmodels.tsa.holtwinters import ExponentialSmoothing\n",
 
 
 
 
17
  "\n",
18
  "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n",
19
  "\n",
20
  "### Load ALL data ###\n",
21
+ "# all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")\n",
22
+ "all_data = pd.read_csv(dataPATH + r\"\\extended_energy_data.csv\")"
23
  ]
24
  },
25
  {
 
31
  },
32
  {
33
  "cell_type": "code",
34
+ "execution_count": null,
35
  "metadata": {},
36
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "source": [
38
  "# Prepar energy data set with extended features\n",
39
  "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n",
 
50
  },
51
  {
52
  "cell_type": "code",
53
+ "execution_count": null,
54
  "metadata": {
55
  "vscode": {
56
  "languageId": "ruby"
 
59
  "outputs": [],
60
  "source": [
61
  "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n",
62
+ "window_size = 4*4 # 4 hours\n",
63
  "eed_15m_avg = eed_15m.copy()\n",
64
  "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
65
+ "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
66
+ "\n",
67
+ "window_size = 4 # 4 hours\n",
68
+ "eed_1h_avg = eed_1h.copy()\n",
69
+ "eed_1h_avg['hvac_N'] = eed_1h['hvac_N'].rolling(window=window_size).mean()\n",
70
+ "eed_1h_avg['hvac_S'] = eed_1h['hvac_S'].rolling(window=window_size).mean()"
71
  ]
72
  },
73
  {
74
  "cell_type": "code",
75
+ "execution_count": null,
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "%matplotlib qt\n",
80
+ "\n",
81
+ "start_date = '2018-06-02'\n",
82
+ "end_date = '2018-06-08'\n",
83
+ "\n",
84
+ "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n",
85
+ "plt.plot(eed_15m_avg['hvac_N'].loc[start_date:end_date])\n",
86
+ "plt.plot(eed_1h_avg['hvac_N'].loc[start_date:end_date])\n",
87
+ "plt.xticks(rotation=45)\n",
88
+ "plt.show()"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": null,
94
+ "metadata": {},
95
+ "outputs": [],
96
+ "source": [
97
+ "%matplotlib qt\n",
98
+ "\n",
99
+ "plt.figure(figsize=(20,10))\n",
100
+ "plt.plot(eed_1h['hvac_S'])\n",
101
+ "plt.show()"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": null,
107
+ "metadata": {},
108
+ "outputs": [],
109
+ "source": [
110
+ "eed_1h[eed_1h['hvac_S'].isna()]"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "markdown",
115
+ "metadata": {},
116
+ "source": [
117
+ "### Filling data gaps"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": null,
123
+ "metadata": {},
124
+ "outputs": [],
125
+ "source": [
126
+ "def fillgap(firstTS, secondTS, seasonal_periods):\n",
127
+ " \n",
128
+ " #PREPARATION\n",
129
+ " one = timedelta(hours=1)\n",
130
+ " secondTSr = secondTS[::-1].copy()\n",
131
+ " firstTSr = firstTS[::-1].copy()\n",
132
+ " indexr = pd.date_range(start=firstTS.index[0], end=secondTS.index[-1], freq='h')\n",
133
+ " firstTSr.index = indexr[-len(firstTSr):]\n",
134
+ " secondTSr.index = indexr[:len(secondTSr)]\n",
135
+ " \n",
136
+ " #FORWARD \n",
137
+ " es = ExponentialSmoothing(firstTS, seasonal_periods=seasonal_periods,seasonal='add').fit()\n",
138
+ " forwardPrediction = es.predict(start=firstTS.index[-1]+one, end=secondTS.index[0]-one)\n",
139
+ " \n",
140
+ " #BACKWARD\n",
141
+ " es = ExponentialSmoothing(secondTSr, seasonal_periods=seasonal_periods,seasonal='add').fit()\n",
142
+ " backwardPrediction = es.predict(start=secondTSr.index[-1]+one, end=firstTSr.index[0]-one)\n",
143
+ " \n",
144
+ " #INTERPOLATION\n",
145
+ " l = len(forwardPrediction)\n",
146
+ " interpolation = pd.Series([(backwardPrediction[i] * i + forwardPrediction[i] * (l -i) )/ l for i in range(l)], index=forwardPrediction.index.copy())\n",
147
+ " \n",
148
+ " return interpolation"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": null,
154
+ "metadata": {},
155
+ "outputs": [],
156
+ "source": [
157
+ "# Function to split the data into multiple DataFrames based on the gaps\n",
158
+ "def split_dfs(data):\n",
159
+ "\n",
160
+ " # Prepare the DataFrame\n",
161
+ " df = data.copy()\n",
162
+ " df = df.reset_index()\n",
163
+ " df= df.dropna()\n",
164
+ " \n",
165
+ " # Set the maximum allowable gap (e.g., 1 hour)\n",
166
+ " max_gap = pd.Timedelta(hours=1)\n",
167
+ "\n",
168
+ " # Calculate the differences between consecutive timestamps\n",
169
+ " time_diff = df['date'].diff()\n",
170
+ "\n",
171
+ " # Identify gaps larger than the maximum allowable gap\n",
172
+ " gaps = time_diff > max_gap\n",
173
+ "\n",
174
+ " # Create a new column to identify different groups\n",
175
+ " df['group'] = gaps.cumsum()\n",
176
+ "\n",
177
+ " df.set_index('date', inplace=True)\n",
178
+ "\n",
179
+ " # Split the DataFrame into a list of DataFrames based on the groups\n",
180
+ " dfs = [group for _, group in df.groupby('group')]\n",
181
+ "\n",
182
+ " return dfs"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": null,
188
+ "metadata": {},
189
+ "outputs": [],
190
+ "source": [
191
+ "seasonal_periods = 24\n",
192
+ "dfs = split_dfs(eed_1h[['hvac_N']])\n",
193
+ "\n",
194
+ "# Interpolate the gaps between the DataFrames\n",
195
+ "ip_df = pd.DataFrame()\n",
196
+ "for ii in range(len(dfs)-1):\n",
197
+ " if (len(dfs[ii]) > 2*seasonal_periods+10) and (len(dfs[ii+1]) > 2*seasonal_periods+10):\n",
198
+ " interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
199
+ " ip_df = pd.concat([ip_df,interpolation])\n",
200
+ " else:\n",
201
+ " continue"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 7,
207
  "metadata": {},
208
  "outputs": [
209
  {
210
+ "name": "stderr",
211
+ "output_type": "stream",
212
+ "text": [
213
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
214
+ " self._init_dates(dates, freq)\n",
215
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
216
+ " self._init_dates(dates, freq)\n",
217
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
218
+ " self._init_dates(dates, freq)\n",
219
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
220
+ " self._init_dates(dates, freq)\n",
221
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
222
+ " self._init_dates(dates, freq)\n",
223
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
224
+ " self._init_dates(dates, freq)\n",
225
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
226
+ " self._init_dates(dates, freq)\n",
227
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
228
+ " self._init_dates(dates, freq)\n",
229
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
230
+ " self._init_dates(dates, freq)\n",
231
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
232
+ " self._init_dates(dates, freq)\n",
233
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
234
+ " self._init_dates(dates, freq)\n",
235
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
236
+ " self._init_dates(dates, freq)\n",
237
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
238
+ " self._init_dates(dates, freq)\n",
239
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
240
+ " self._init_dates(dates, freq)\n",
241
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
242
+ " self._init_dates(dates, freq)\n",
243
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
244
+ " self._init_dates(dates, freq)\n",
245
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
246
+ " self._init_dates(dates, freq)\n",
247
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
248
+ " self._init_dates(dates, freq)\n",
249
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
250
+ " self._init_dates(dates, freq)\n",
251
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
252
+ " self._init_dates(dates, freq)\n",
253
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
254
+ " warnings.warn(\n",
255
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
256
+ " self._init_dates(dates, freq)\n",
257
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
258
+ " warnings.warn(\n",
259
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
260
+ " self._init_dates(dates, freq)\n",
261
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
262
+ " warnings.warn(\n",
263
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency H will be used.\n",
264
+ " self._init_dates(dates, freq)\n",
265
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\statsmodels\\tsa\\holtwinters\\model.py:917: ConvergenceWarning: Optimization failed to converge. Check mle_retvals.\n",
266
+ " warnings.warn(\n"
267
+ ]
268
  }
269
  ],
270
  "source": [
271
+ "seasonal_periods = 24\n",
272
+ "dfs = split_dfs(eed_1h[['hvac_N']])\n",
273
  "\n",
274
+ "# Interpolate the gaps between the DataFrames\n",
275
+ "ip_df = pd.DataFrame()\n",
276
+ "for ii in range(len(dfs)-1):\n",
277
+ " seasonal_periods = max(min([len(dfs[ii]), len(dfs[ii+1])]) // 2 - 10, 2)\n",
278
+ " \n",
279
+ " if seasonal_periods > 2*24*7 + 10: # Using more than 1 week of seasonal patterns is not necessary\n",
280
+ " seasonal_periods = 24*7\n",
281
+ " interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
282
+ " else:\n",
283
+ " interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
284
  "\n",
285
+ " ip_df = pd.concat([ip_df,interpolation])\n"
286
+ ]
287
+ },
288
+ {
289
+ "cell_type": "code",
290
+ "execution_count": 8,
291
+ "metadata": {},
292
+ "outputs": [],
293
+ "source": [
294
+ "%matplotlib qt\n",
295
+ "plt.plot(eed_1h['hvac_N'])\n",
296
+ "plt.plot(ip_df)\n",
297
+ "\n",
298
+ "plt.show()"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": null,
304
+ "metadata": {},
305
+ "outputs": [],
306
+ "source": [
307
+ "seasonal_periods=2\n",
308
+ "for ii in range(len(dfs)-1):\n",
309
+ " interpolation = fillgap(dfs[ii]['hvac_N'], dfs[ii+1]['hvac_N'], seasonal_periods)\n",
310
+ " ip_df = pd.concat([ip_df,interpolation])"
311
  ]
312
  },
313
  {
EnergyLSTM/lstm_energy.ipynb CHANGED
@@ -132,13 +132,21 @@
132
  "extended_energy_data.set_index('date', inplace=True)\n",
133
  "\n",
134
  "eed_15m = extended_energy_data.resample('15T').mean()\n",
 
 
135
  "eed_15m = eed_15m.reset_index(drop=False)\n",
 
136
  "\n",
137
- "window_size = 12*4 # Half a day\n",
138
  "eed_15m_avg = eed_15m.copy()\n",
139
  "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
140
  "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
141
  "\n",
 
 
 
 
 
142
  "eed_15m.head()"
143
  ]
144
  },
@@ -210,6 +218,31 @@
210
  "testdataset = scaler.transform(testdataset)"
211
  ]
212
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  {
214
  "cell_type": "code",
215
  "execution_count": 35,
@@ -277,15 +310,7 @@
277
  "X_train, y_train = create_dataset(train, time_step)\n",
278
  "X_test, y_test = create_dataset(test, time_step)\n",
279
  "\n",
280
- "\n",
281
- "model = Sequential()\n",
282
- "model.add(LSTM(units=50, return_sequences=True, dropout= 0.2, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
283
- "model.add(LSTM(units=50, dropout= 0.2, return_sequences=True))\n",
284
- "model.add(LSTM(units=time_step*no_outputs))\n",
285
- "model.add(Dense(units=time_step*no_outputs))\n",
286
- "\n",
287
- "model.compile(optimizer='adam', loss='mean_squared_error')\n",
288
- "\n",
289
  "checkpoint_path = \"lstm_energy_01.keras\"\n",
290
  "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
291
  "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
@@ -293,16 +318,16 @@
293
  },
294
  {
295
  "cell_type": "code",
296
- "execution_count": 36,
297
  "metadata": {},
298
  "outputs": [
299
  {
300
  "name": "stdout",
301
  "output_type": "stream",
302
  "text": [
303
- "86/86 [==============================] - 0s 3ms/step - loss: 0.0186\n",
304
- "86/86 [==============================] - 1s 3ms/step\n",
305
- "Loss: 0.01863059028983116\n"
306
  ]
307
  }
308
  ],
@@ -318,9 +343,21 @@
318
  },
319
  {
320
  "cell_type": "code",
321
- "execution_count": 37,
322
  "metadata": {},
323
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
324
  "source": [
325
  "%matplotlib qt\n",
326
  "\n",
@@ -372,6 +409,153 @@
372
  "plt.legend()"
373
  ]
374
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  {
376
  "cell_type": "code",
377
  "execution_count": null,
 
132
  "extended_energy_data.set_index('date', inplace=True)\n",
133
  "\n",
134
  "eed_15m = extended_energy_data.resample('15T').mean()\n",
135
+ "eed_1h = extended_energy_data.resample('60T').mean()\n",
136
+ "\n",
137
  "eed_15m = eed_15m.reset_index(drop=False)\n",
138
+ "eed_1h = eed_1h.reset_index(drop=False)\n",
139
  "\n",
140
+ "window_size = 4*4 # 4 hours\n",
141
  "eed_15m_avg = eed_15m.copy()\n",
142
  "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
143
  "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
144
  "\n",
145
+ "window_size = 4 # 4 hours\n",
146
+ "eed_1h_avg = eed_1h.copy()\n",
147
+ "eed_1h_avg['hvac_N'] = eed_1h['hvac_N'].rolling(window=window_size).mean()\n",
148
+ "eed_1h_avg['hvac_S'] = eed_1h['hvac_S'].rolling(window=window_size).mean()\n",
149
+ "\n",
150
  "eed_15m.head()"
151
  ]
152
  },
 
218
  "testdataset = scaler.transform(testdataset)"
219
  ]
220
  },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": 48,
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": [
227
+ "def create_model(X_train, time_step, no_outputs):\n",
228
+ " model = Sequential()\n",
229
+ " model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
230
+ " model.add(LSTM(units=50, return_sequences=True))\n",
231
+ " model.add(LSTM(units=time_step*no_outputs))\n",
232
+ " model.add(Dense(units=time_step*no_outputs))\n",
233
+ "\n",
234
+ " model.compile(optimizer='adam', loss='mean_squared_error')\n",
235
+ "\n",
236
+ " return model"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "markdown",
241
+ "metadata": {},
242
+ "source": [
243
+ "### Model 1 (continous predictions)"
244
+ ]
245
+ },
246
  {
247
  "cell_type": "code",
248
  "execution_count": 35,
 
310
  "X_train, y_train = create_dataset(train, time_step)\n",
311
  "X_test, y_test = create_dataset(test, time_step)\n",
312
  "\n",
313
+ "model = create_model(X_train, time_step, no_outputs)\n",
 
 
 
 
 
 
 
 
314
  "checkpoint_path = \"lstm_energy_01.keras\"\n",
315
  "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
316
  "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
 
318
  },
319
  {
320
  "cell_type": "code",
321
+ "execution_count": 51,
322
  "metadata": {},
323
  "outputs": [
324
  {
325
  "name": "stdout",
326
  "output_type": "stream",
327
  "text": [
328
+ "4/4 [==============================] - 0s 4ms/step - loss: 0.0153\n",
329
+ "4/4 [==============================] - 1s 4ms/step\n",
330
+ "Loss: 0.01531214825809002\n"
331
  ]
332
  }
333
  ],
 
343
  },
344
  {
345
  "cell_type": "code",
346
+ "execution_count": 52,
347
  "metadata": {},
348
+ "outputs": [
349
+ {
350
+ "ename": "IndexError",
351
+ "evalue": "index 106 is out of bounds for axis 0 with size 106",
352
+ "output_type": "error",
353
+ "traceback": [
354
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
355
+ "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)",
356
+ "Cell \u001b[1;32mIn[52], line 10\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;66;03m# Loop over the value index\u001b[39;00m\n\u001b[0;32m 8\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(axes\u001b[38;5;241m.\u001b[39mflat):\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# Plot your data or perform any other operations\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m ax\u001b[38;5;241m.\u001b[39mplot(\u001b[43my_test\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvar\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43mtime_step\u001b[49m\u001b[43m]\u001b[49m, label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOriginal Testing Data\u001b[39m\u001b[38;5;124m'\u001b[39m, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblue\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 11\u001b[0m ax\u001b[38;5;241m.\u001b[39mplot(test_predict1[var\u001b[38;5;241m+\u001b[39mi,\u001b[38;5;241m0\u001b[39m:time_step], label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPredicted Testing Data\u001b[39m\u001b[38;5;124m'\u001b[39m, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mred\u001b[39m\u001b[38;5;124m'\u001b[39m,alpha\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.8\u001b[39m)\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# ax.set_title(f'Plot {i+1}')\u001b[39;00m\n",
357
+ "\u001b[1;31mIndexError\u001b[0m: index 106 is out of bounds for axis 0 with size 106"
358
+ ]
359
+ }
360
+ ],
361
  "source": [
362
  "%matplotlib qt\n",
363
  "\n",
 
409
  "plt.legend()"
410
  ]
411
  },
412
+ {
413
+ "cell_type": "markdown",
414
+ "metadata": {},
415
+ "source": [
416
+ "### Model 2 (Predicting once per day)"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "code",
421
+ "execution_count": 50,
422
+ "metadata": {},
423
+ "outputs": [
424
+ {
425
+ "name": "stdout",
426
+ "output_type": "stream",
427
+ "text": [
428
+ "Epoch 1/20\n",
429
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0850 \n",
430
+ "Epoch 1: val_loss improved from inf to 0.07467, saving model to lstm_energy_01.keras\n",
431
+ "10/10 [==============================] - 7s 131ms/step - loss: 0.0791 - val_loss: 0.0747\n",
432
+ "Epoch 2/20\n",
433
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0487\n",
434
+ "Epoch 2: val_loss improved from 0.07467 to 0.03484, saving model to lstm_energy_01.keras\n",
435
+ "10/10 [==============================] - 0s 20ms/step - loss: 0.0419 - val_loss: 0.0348\n",
436
+ "Epoch 3/20\n",
437
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0262\n",
438
+ "Epoch 3: val_loss improved from 0.03484 to 0.02388, saving model to lstm_energy_01.keras\n",
439
+ "10/10 [==============================] - 0s 17ms/step - loss: 0.0241 - val_loss: 0.0239\n",
440
+ "Epoch 4/20\n",
441
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0180\n",
442
+ "Epoch 4: val_loss improved from 0.02388 to 0.02059, saving model to lstm_energy_01.keras\n",
443
+ "10/10 [==============================] - 0s 18ms/step - loss: 0.0174 - val_loss: 0.0206\n",
444
+ "Epoch 5/20\n",
445
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0134\n",
446
+ "Epoch 5: val_loss improved from 0.02059 to 0.01839, saving model to lstm_energy_01.keras\n",
447
+ "10/10 [==============================] - 0s 18ms/step - loss: 0.0130 - val_loss: 0.0184\n",
448
+ "Epoch 6/20\n",
449
+ " 8/10 [=======================>......] - ETA: 0s - loss: 0.0107\n",
450
+ "Epoch 6: val_loss did not improve from 0.01839\n",
451
+ "10/10 [==============================] - 0s 21ms/step - loss: 0.0106 - val_loss: 0.0255\n",
452
+ "Epoch 7/20\n",
453
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0090\n",
454
+ "Epoch 7: val_loss did not improve from 0.01839\n",
455
+ "10/10 [==============================] - 0s 14ms/step - loss: 0.0090 - val_loss: 0.0261\n",
456
+ "Epoch 8/20\n",
457
+ "10/10 [==============================] - ETA: 0s - loss: 0.0085\n",
458
+ "Epoch 8: val_loss did not improve from 0.01839\n",
459
+ "10/10 [==============================] - 0s 18ms/step - loss: 0.0085 - val_loss: 0.0197\n",
460
+ "Epoch 9/20\n",
461
+ " 9/10 [==========================>...] - ETA: 0s - loss: 0.0074\n",
462
+ "Epoch 9: val_loss improved from 0.01839 to 0.01687, saving model to lstm_energy_01.keras\n",
463
+ "10/10 [==============================] - 0s 22ms/step - loss: 0.0074 - val_loss: 0.0169\n",
464
+ "Epoch 10/20\n",
465
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0066\n",
466
+ "Epoch 10: val_loss did not improve from 0.01687\n",
467
+ "10/10 [==============================] - 0s 14ms/step - loss: 0.0068 - val_loss: 0.0171\n",
468
+ "Epoch 11/20\n",
469
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0063\n",
470
+ "Epoch 11: val_loss did not improve from 0.01687\n",
471
+ "10/10 [==============================] - 0s 14ms/step - loss: 0.0061 - val_loss: 0.0191\n",
472
+ "Epoch 12/20\n",
473
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0057\n",
474
+ "Epoch 12: val_loss improved from 0.01687 to 0.01678, saving model to lstm_energy_01.keras\n",
475
+ "10/10 [==============================] - 0s 18ms/step - loss: 0.0057 - val_loss: 0.0168\n",
476
+ "Epoch 13/20\n",
477
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0052\n",
478
+ "Epoch 13: val_loss did not improve from 0.01678\n",
479
+ "10/10 [==============================] - 0s 13ms/step - loss: 0.0058 - val_loss: 0.0206\n",
480
+ "Epoch 14/20\n",
481
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0058\n",
482
+ "Epoch 14: val_loss improved from 0.01678 to 0.01612, saving model to lstm_energy_01.keras\n",
483
+ "10/10 [==============================] - 0s 20ms/step - loss: 0.0062 - val_loss: 0.0161\n",
484
+ "Epoch 15/20\n",
485
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0061\n",
486
+ "Epoch 15: val_loss did not improve from 0.01612\n",
487
+ "10/10 [==============================] - 0s 14ms/step - loss: 0.0059 - val_loss: 0.0184\n",
488
+ "Epoch 16/20\n",
489
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0054\n",
490
+ "Epoch 16: val_loss improved from 0.01612 to 0.01561, saving model to lstm_energy_01.keras\n",
491
+ "10/10 [==============================] - 0s 17ms/step - loss: 0.0053 - val_loss: 0.0156\n",
492
+ "Epoch 17/20\n",
493
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0046\n",
494
+ "Epoch 17: val_loss did not improve from 0.01561\n",
495
+ "10/10 [==============================] - 0s 13ms/step - loss: 0.0048 - val_loss: 0.0166\n",
496
+ "Epoch 18/20\n",
497
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0054\n",
498
+ "Epoch 18: val_loss improved from 0.01561 to 0.01503, saving model to lstm_energy_01.keras\n",
499
+ "10/10 [==============================] - 0s 18ms/step - loss: 0.0052 - val_loss: 0.0150\n",
500
+ "Epoch 19/20\n",
501
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0050\n",
502
+ "Epoch 19: val_loss did not improve from 0.01503\n",
503
+ "10/10 [==============================] - 0s 13ms/step - loss: 0.0046 - val_loss: 0.0156\n",
504
+ "Epoch 20/20\n",
505
+ " 6/10 [=================>............] - ETA: 0s - loss: 0.0045\n",
506
+ "Epoch 20: val_loss did not improve from 0.01503\n",
507
+ "10/10 [==============================] - 0s 14ms/step - loss: 0.0045 - val_loss: 0.0153\n"
508
+ ]
509
+ },
510
+ {
511
+ "data": {
512
+ "text/plain": [
513
+ "<keras.callbacks.History at 0x25e3a8cf640>"
514
+ ]
515
+ },
516
+ "execution_count": 50,
517
+ "metadata": {},
518
+ "output_type": "execute_result"
519
+ }
520
+ ],
521
+ "source": [
522
+ "train,test = traindataset,testdataset\n",
523
+ "steps_in_past = 7 \n",
524
+ "time_step = 24\n",
525
+ "no_inputs = 5\n",
526
+ "no_outputs = 2\n",
527
+ "def create_dataset(dataset,time_step):\n",
528
+ " x = [[] for _ in range(no_inputs)] \n",
529
+ " Y = [[] for _ in range(no_outputs)]\n",
530
+ " for i in range(steps_in_past, round(len(dataset)/24) - steps_in_past): # -time_step is to ensure that the Y value has enough values\n",
531
+ " for j in range(no_inputs):\n",
532
+ " x[j].append(dataset[(i-steps_in_past)*time_step:i*time_step, j])\n",
533
+ " for j in range(no_outputs):\n",
534
+ " Y[j].append(dataset[i*time_step:(i+1)*time_step, j]) \n",
535
+ " x = [np.array(feature_list) for feature_list in x]\n",
536
+ " x = np.stack(x,axis=1)\n",
537
+ " Y = [np.array(feature_list) for feature_list in Y] \n",
538
+ " Y = np.stack(Y,axis=1)\n",
539
+ " Y = np.reshape(Y, (Y.shape[0], time_step*no_outputs))\n",
540
+ " return x, Y\n",
541
+ "\n",
542
+ "\n",
543
+ "X_train, y_train = create_dataset(train, time_step)\n",
544
+ "X_test, y_test = create_dataset(test, time_step)\n",
545
+ "\n",
546
+ "model = create_model(X_train, time_step, no_outputs)\n",
547
+ "checkpoint_path = \"lstm_energy_01.keras\"\n",
548
+ "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
549
+ "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
550
+ ]
551
+ },
552
+ {
553
+ "cell_type": "code",
554
+ "execution_count": null,
555
+ "metadata": {},
556
+ "outputs": [],
557
+ "source": []
558
+ },
559
  {
560
  "cell_type": "code",
561
  "execution_count": null,
EnergyLSTM/lstm_energy_01.keras DELETED
Binary file (574 kB)