levimohle commited on
Commit
30c3c9d
2 Parent(s): 33bc29d 8935117

Merge branch 'lstm' of hf.co:spaces/smartbuildings/smart-buildings into lstm

Browse files
Files changed (2) hide show
  1. physLSTM/full_lstm.ipynb +143 -560
  2. physLSTM/lstm_vav.ipynb +1186 -0
physLSTM/full_lstm.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 56,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -22,7 +22,7 @@
22
  },
23
  {
24
  "cell_type": "code",
25
- "execution_count": 57,
26
  "metadata": {},
27
  "outputs": [
28
  {
@@ -31,7 +31,7 @@
31
  "[]"
32
  ]
33
  },
34
- "execution_count": 57,
35
  "metadata": {},
36
  "output_type": "execute_result"
37
  }
@@ -43,423 +43,9 @@
43
  },
44
  {
45
  "cell_type": "code",
46
- "execution_count": 58,
47
  "metadata": {},
48
- "outputs": [
49
- {
50
- "data": {
51
- "text/html": [
52
- "<div>\n",
53
- "<style scoped>\n",
54
- " .dataframe tbody tr th:only-of-type {\n",
55
- " vertical-align: middle;\n",
56
- " }\n",
57
- "\n",
58
- " .dataframe tbody tr th {\n",
59
- " vertical-align: top;\n",
60
- " }\n",
61
- "\n",
62
- " .dataframe thead th {\n",
63
- " text-align: right;\n",
64
- " }\n",
65
- "</style>\n",
66
- "<table border=\"1\" class=\"dataframe\">\n",
67
- " <thead>\n",
68
- " <tr style=\"text-align: right;\">\n",
69
- " <th></th>\n",
70
- " <th>date</th>\n",
71
- " <th>zone_047_hw_valve</th>\n",
72
- " <th>rtu_004_sat_sp_tn</th>\n",
73
- " <th>zone_047_temp</th>\n",
74
- " <th>zone_047_fan_spd</th>\n",
75
- " <th>rtu_004_fltrd_sa_flow_tn</th>\n",
76
- " <th>rtu_004_sa_temp</th>\n",
77
- " <th>rtu_004_pa_static_stpt_tn</th>\n",
78
- " <th>rtu_004_oa_flow_tn</th>\n",
79
- " <th>rtu_004_oadmpr_pct</th>\n",
80
- " <th>...</th>\n",
81
- " <th>zone_047_heating_sp</th>\n",
82
- " <th>Unnamed: 47_y</th>\n",
83
- " <th>hvac_S</th>\n",
84
- " <th>hp_hws_temp</th>\n",
85
- " <th>aru_001_cwr_temp</th>\n",
86
- " <th>aru_001_cws_fr_gpm</th>\n",
87
- " <th>aru_001_cws_temp</th>\n",
88
- " <th>aru_001_hwr_temp</th>\n",
89
- " <th>aru_001_hws_fr_gpm</th>\n",
90
- " <th>aru_001_hws_temp</th>\n",
91
- " </tr>\n",
92
- " </thead>\n",
93
- " <tbody>\n",
94
- " <tr>\n",
95
- " <th>0</th>\n",
96
- " <td>2018-01-01 00:00:00</td>\n",
97
- " <td>100.0</td>\n",
98
- " <td>69.0</td>\n",
99
- " <td>67.5</td>\n",
100
- " <td>20.0</td>\n",
101
- " <td>9265.604</td>\n",
102
- " <td>66.1</td>\n",
103
- " <td>0.06</td>\n",
104
- " <td>0.000000</td>\n",
105
- " <td>28.0</td>\n",
106
- " <td>...</td>\n",
107
- " <td>NaN</td>\n",
108
- " <td>NaN</td>\n",
109
- " <td>NaN</td>\n",
110
- " <td>75.3</td>\n",
111
- " <td>NaN</td>\n",
112
- " <td>NaN</td>\n",
113
- " <td>NaN</td>\n",
114
- " <td>NaN</td>\n",
115
- " <td>NaN</td>\n",
116
- " <td>NaN</td>\n",
117
- " </tr>\n",
118
- " <tr>\n",
119
- " <th>1</th>\n",
120
- " <td>2018-01-01 00:01:00</td>\n",
121
- " <td>100.0</td>\n",
122
- " <td>69.0</td>\n",
123
- " <td>67.5</td>\n",
124
- " <td>20.0</td>\n",
125
- " <td>9265.604</td>\n",
126
- " <td>66.0</td>\n",
127
- " <td>0.06</td>\n",
128
- " <td>6572.099162</td>\n",
129
- " <td>28.0</td>\n",
130
- " <td>...</td>\n",
131
- " <td>NaN</td>\n",
132
- " <td>NaN</td>\n",
133
- " <td>NaN</td>\n",
134
- " <td>75.3</td>\n",
135
- " <td>NaN</td>\n",
136
- " <td>NaN</td>\n",
137
- " <td>NaN</td>\n",
138
- " <td>NaN</td>\n",
139
- " <td>NaN</td>\n",
140
- " <td>NaN</td>\n",
141
- " </tr>\n",
142
- " <tr>\n",
143
- " <th>2</th>\n",
144
- " <td>2018-01-01 00:02:00</td>\n",
145
- " <td>100.0</td>\n",
146
- " <td>69.0</td>\n",
147
- " <td>67.5</td>\n",
148
- " <td>20.0</td>\n",
149
- " <td>9708.240</td>\n",
150
- " <td>66.1</td>\n",
151
- " <td>0.06</td>\n",
152
- " <td>7628.832542</td>\n",
153
- " <td>28.0</td>\n",
154
- " <td>...</td>\n",
155
- " <td>NaN</td>\n",
156
- " <td>NaN</td>\n",
157
- " <td>NaN</td>\n",
158
- " <td>75.3</td>\n",
159
- " <td>NaN</td>\n",
160
- " <td>NaN</td>\n",
161
- " <td>NaN</td>\n",
162
- " <td>NaN</td>\n",
163
- " <td>NaN</td>\n",
164
- " <td>NaN</td>\n",
165
- " </tr>\n",
166
- " <tr>\n",
167
- " <th>3</th>\n",
168
- " <td>2018-01-01 00:03:00</td>\n",
169
- " <td>100.0</td>\n",
170
- " <td>69.0</td>\n",
171
- " <td>67.5</td>\n",
172
- " <td>20.0</td>\n",
173
- " <td>9611.638</td>\n",
174
- " <td>66.1</td>\n",
175
- " <td>0.06</td>\n",
176
- " <td>7710.294617</td>\n",
177
- " <td>28.0</td>\n",
178
- " <td>...</td>\n",
179
- " <td>NaN</td>\n",
180
- " <td>NaN</td>\n",
181
- " <td>NaN</td>\n",
182
- " <td>75.3</td>\n",
183
- " <td>NaN</td>\n",
184
- " <td>NaN</td>\n",
185
- " <td>NaN</td>\n",
186
- " <td>NaN</td>\n",
187
- " <td>NaN</td>\n",
188
- " <td>NaN</td>\n",
189
- " </tr>\n",
190
- " <tr>\n",
191
- " <th>4</th>\n",
192
- " <td>2018-01-01 00:04:00</td>\n",
193
- " <td>100.0</td>\n",
194
- " <td>69.0</td>\n",
195
- " <td>67.5</td>\n",
196
- " <td>20.0</td>\n",
197
- " <td>9215.110</td>\n",
198
- " <td>66.0</td>\n",
199
- " <td>0.06</td>\n",
200
- " <td>7139.184090</td>\n",
201
- " <td>28.0</td>\n",
202
- " <td>...</td>\n",
203
- " <td>NaN</td>\n",
204
- " <td>NaN</td>\n",
205
- " <td>NaN</td>\n",
206
- " <td>75.3</td>\n",
207
- " <td>NaN</td>\n",
208
- " <td>NaN</td>\n",
209
- " <td>NaN</td>\n",
210
- " <td>NaN</td>\n",
211
- " <td>NaN</td>\n",
212
- " <td>NaN</td>\n",
213
- " </tr>\n",
214
- " <tr>\n",
215
- " <th>...</th>\n",
216
- " <td>...</td>\n",
217
- " <td>...</td>\n",
218
- " <td>...</td>\n",
219
- " <td>...</td>\n",
220
- " <td>...</td>\n",
221
- " <td>...</td>\n",
222
- " <td>...</td>\n",
223
- " <td>...</td>\n",
224
- " <td>...</td>\n",
225
- " <td>...</td>\n",
226
- " <td>...</td>\n",
227
- " <td>...</td>\n",
228
- " <td>...</td>\n",
229
- " <td>...</td>\n",
230
- " <td>...</td>\n",
231
- " <td>...</td>\n",
232
- " <td>...</td>\n",
233
- " <td>...</td>\n",
234
- " <td>...</td>\n",
235
- " <td>...</td>\n",
236
- " <td>...</td>\n",
237
- " </tr>\n",
238
- " <tr>\n",
239
- " <th>2072149</th>\n",
240
- " <td>2020-12-31 23:58:00</td>\n",
241
- " <td>100.0</td>\n",
242
- " <td>68.0</td>\n",
243
- " <td>63.2</td>\n",
244
- " <td>20.0</td>\n",
245
- " <td>18884.834</td>\n",
246
- " <td>64.4</td>\n",
247
- " <td>0.06</td>\n",
248
- " <td>2938.320000</td>\n",
249
- " <td>23.4</td>\n",
250
- " <td>...</td>\n",
251
- " <td>71.0</td>\n",
252
- " <td>69.0</td>\n",
253
- " <td>23.145000</td>\n",
254
- " <td>123.8</td>\n",
255
- " <td>56.25</td>\n",
256
- " <td>54.71</td>\n",
257
- " <td>56.4</td>\n",
258
- " <td>123.42</td>\n",
259
- " <td>61.6</td>\n",
260
- " <td>122.36</td>\n",
261
- " </tr>\n",
262
- " <tr>\n",
263
- " <th>2072150</th>\n",
264
- " <td>2020-12-31 23:58:00</td>\n",
265
- " <td>100.0</td>\n",
266
- " <td>68.0</td>\n",
267
- " <td>63.2</td>\n",
268
- " <td>20.0</td>\n",
269
- " <td>18884.834</td>\n",
270
- " <td>64.4</td>\n",
271
- " <td>0.06</td>\n",
272
- " <td>2938.320000</td>\n",
273
- " <td>23.4</td>\n",
274
- " <td>...</td>\n",
275
- " <td>71.0</td>\n",
276
- " <td>69.0</td>\n",
277
- " <td>23.145000</td>\n",
278
- " <td>123.8</td>\n",
279
- " <td>56.25</td>\n",
280
- " <td>54.71</td>\n",
281
- " <td>56.4</td>\n",
282
- " <td>123.42</td>\n",
283
- " <td>61.6</td>\n",
284
- " <td>122.36</td>\n",
285
- " </tr>\n",
286
- " <tr>\n",
287
- " <th>2072151</th>\n",
288
- " <td>2020-12-31 23:59:00</td>\n",
289
- " <td>100.0</td>\n",
290
- " <td>68.0</td>\n",
291
- " <td>63.2</td>\n",
292
- " <td>20.0</td>\n",
293
- " <td>19345.508</td>\n",
294
- " <td>64.3</td>\n",
295
- " <td>0.06</td>\n",
296
- " <td>3154.390000</td>\n",
297
- " <td>23.4</td>\n",
298
- " <td>...</td>\n",
299
- " <td>71.0</td>\n",
300
- " <td>69.0</td>\n",
301
- " <td>23.145000</td>\n",
302
- " <td>123.8</td>\n",
303
- " <td>56.25</td>\n",
304
- " <td>54.71</td>\n",
305
- " <td>56.4</td>\n",
306
- " <td>123.42</td>\n",
307
- " <td>61.6</td>\n",
308
- " <td>122.36</td>\n",
309
- " </tr>\n",
310
- " <tr>\n",
311
- " <th>2072152</th>\n",
312
- " <td>2020-12-31 23:59:00</td>\n",
313
- " <td>100.0</td>\n",
314
- " <td>68.0</td>\n",
315
- " <td>63.2</td>\n",
316
- " <td>20.0</td>\n",
317
- " <td>19345.508</td>\n",
318
- " <td>64.3</td>\n",
319
- " <td>0.06</td>\n",
320
- " <td>3154.390000</td>\n",
321
- " <td>23.4</td>\n",
322
- " <td>...</td>\n",
323
- " <td>71.0</td>\n",
324
- " <td>69.0</td>\n",
325
- " <td>23.145000</td>\n",
326
- " <td>123.8</td>\n",
327
- " <td>56.25</td>\n",
328
- " <td>54.71</td>\n",
329
- " <td>56.4</td>\n",
330
- " <td>123.42</td>\n",
331
- " <td>61.6</td>\n",
332
- " <td>122.36</td>\n",
333
- " </tr>\n",
334
- " <tr>\n",
335
- " <th>2072153</th>\n",
336
- " <td>2021-01-01 00:00:00</td>\n",
337
- " <td>100.0</td>\n",
338
- " <td>68.0</td>\n",
339
- " <td>63.2</td>\n",
340
- " <td>20.0</td>\n",
341
- " <td>18650.232</td>\n",
342
- " <td>64.1</td>\n",
343
- " <td>0.06</td>\n",
344
- " <td>3076.270000</td>\n",
345
- " <td>22.9</td>\n",
346
- " <td>...</td>\n",
347
- " <td>71.0</td>\n",
348
- " <td>69.0</td>\n",
349
- " <td>23.788947</td>\n",
350
- " <td>123.8</td>\n",
351
- " <td>56.25</td>\n",
352
- " <td>54.71</td>\n",
353
- " <td>56.4</td>\n",
354
- " <td>123.42</td>\n",
355
- " <td>61.6</td>\n",
356
- " <td>122.36</td>\n",
357
- " </tr>\n",
358
- " </tbody>\n",
359
- "</table>\n",
360
- "<p>2072154 rows × 30 columns</p>\n",
361
- "</div>"
362
- ],
363
- "text/plain": [
364
- " date zone_047_hw_valve rtu_004_sat_sp_tn \\\n",
365
- "0 2018-01-01 00:00:00 100.0 69.0 \n",
366
- "1 2018-01-01 00:01:00 100.0 69.0 \n",
367
- "2 2018-01-01 00:02:00 100.0 69.0 \n",
368
- "3 2018-01-01 00:03:00 100.0 69.0 \n",
369
- "4 2018-01-01 00:04:00 100.0 69.0 \n",
370
- "... ... ... ... \n",
371
- "2072149 2020-12-31 23:58:00 100.0 68.0 \n",
372
- "2072150 2020-12-31 23:58:00 100.0 68.0 \n",
373
- "2072151 2020-12-31 23:59:00 100.0 68.0 \n",
374
- "2072152 2020-12-31 23:59:00 100.0 68.0 \n",
375
- "2072153 2021-01-01 00:00:00 100.0 68.0 \n",
376
- "\n",
377
- " zone_047_temp zone_047_fan_spd rtu_004_fltrd_sa_flow_tn \\\n",
378
- "0 67.5 20.0 9265.604 \n",
379
- "1 67.5 20.0 9265.604 \n",
380
- "2 67.5 20.0 9708.240 \n",
381
- "3 67.5 20.0 9611.638 \n",
382
- "4 67.5 20.0 9215.110 \n",
383
- "... ... ... ... \n",
384
- "2072149 63.2 20.0 18884.834 \n",
385
- "2072150 63.2 20.0 18884.834 \n",
386
- "2072151 63.2 20.0 19345.508 \n",
387
- "2072152 63.2 20.0 19345.508 \n",
388
- "2072153 63.2 20.0 18650.232 \n",
389
- "\n",
390
- " rtu_004_sa_temp rtu_004_pa_static_stpt_tn rtu_004_oa_flow_tn \\\n",
391
- "0 66.1 0.06 0.000000 \n",
392
- "1 66.0 0.06 6572.099162 \n",
393
- "2 66.1 0.06 7628.832542 \n",
394
- "3 66.1 0.06 7710.294617 \n",
395
- "4 66.0 0.06 7139.184090 \n",
396
- "... ... ... ... \n",
397
- "2072149 64.4 0.06 2938.320000 \n",
398
- "2072150 64.4 0.06 2938.320000 \n",
399
- "2072151 64.3 0.06 3154.390000 \n",
400
- "2072152 64.3 0.06 3154.390000 \n",
401
- "2072153 64.1 0.06 3076.270000 \n",
402
- "\n",
403
- " rtu_004_oadmpr_pct ... zone_047_heating_sp Unnamed: 47_y \\\n",
404
- "0 28.0 ... NaN NaN \n",
405
- "1 28.0 ... NaN NaN \n",
406
- "2 28.0 ... NaN NaN \n",
407
- "3 28.0 ... NaN NaN \n",
408
- "4 28.0 ... NaN NaN \n",
409
- "... ... ... ... ... \n",
410
- "2072149 23.4 ... 71.0 69.0 \n",
411
- "2072150 23.4 ... 71.0 69.0 \n",
412
- "2072151 23.4 ... 71.0 69.0 \n",
413
- "2072152 23.4 ... 71.0 69.0 \n",
414
- "2072153 22.9 ... 71.0 69.0 \n",
415
- "\n",
416
- " hvac_S hp_hws_temp aru_001_cwr_temp aru_001_cws_fr_gpm \\\n",
417
- "0 NaN 75.3 NaN NaN \n",
418
- "1 NaN 75.3 NaN NaN \n",
419
- "2 NaN 75.3 NaN NaN \n",
420
- "3 NaN 75.3 NaN NaN \n",
421
- "4 NaN 75.3 NaN NaN \n",
422
- "... ... ... ... ... \n",
423
- "2072149 23.145000 123.8 56.25 54.71 \n",
424
- "2072150 23.145000 123.8 56.25 54.71 \n",
425
- "2072151 23.145000 123.8 56.25 54.71 \n",
426
- "2072152 23.145000 123.8 56.25 54.71 \n",
427
- "2072153 23.788947 123.8 56.25 54.71 \n",
428
- "\n",
429
- " aru_001_cws_temp aru_001_hwr_temp aru_001_hws_fr_gpm \\\n",
430
- "0 NaN NaN NaN \n",
431
- "1 NaN NaN NaN \n",
432
- "2 NaN NaN NaN \n",
433
- "3 NaN NaN NaN \n",
434
- "4 NaN NaN NaN \n",
435
- "... ... ... ... \n",
436
- "2072149 56.4 123.42 61.6 \n",
437
- "2072150 56.4 123.42 61.6 \n",
438
- "2072151 56.4 123.42 61.6 \n",
439
- "2072152 56.4 123.42 61.6 \n",
440
- "2072153 56.4 123.42 61.6 \n",
441
- "\n",
442
- " aru_001_hws_temp \n",
443
- "0 NaN \n",
444
- "1 NaN \n",
445
- "2 NaN \n",
446
- "3 NaN \n",
447
- "4 NaN \n",
448
- "... ... \n",
449
- "2072149 122.36 \n",
450
- "2072150 122.36 \n",
451
- "2072151 122.36 \n",
452
- "2072152 122.36 \n",
453
- "2072153 122.36 \n",
454
- "\n",
455
- "[2072154 rows x 30 columns]"
456
- ]
457
- },
458
- "execution_count": 58,
459
- "metadata": {},
460
- "output_type": "execute_result"
461
- }
462
- ],
463
  "source": [
464
  "merged = pd.read_csv(r'../data/long_merge.csv')\n",
465
  "\n",
@@ -484,7 +70,7 @@
484
  },
485
  {
486
  "cell_type": "code",
487
- "execution_count": 59,
488
  "metadata": {},
489
  "outputs": [
490
  {
@@ -621,7 +207,7 @@
621
  "[2 rows x 23 columns]"
622
  ]
623
  },
624
- "execution_count": 59,
625
  "metadata": {},
626
  "output_type": "execute_result"
627
  }
@@ -644,7 +230,7 @@
644
  },
645
  {
646
  "cell_type": "code",
647
- "execution_count": 60,
648
  "metadata": {},
649
  "outputs": [],
650
  "source": [
@@ -659,7 +245,7 @@
659
  },
660
  {
661
  "cell_type": "code",
662
- "execution_count": 61,
663
  "metadata": {},
664
  "outputs": [],
665
  "source": [
@@ -681,7 +267,7 @@
681
  },
682
  {
683
  "cell_type": "code",
684
- "execution_count": 62,
685
  "metadata": {},
686
  "outputs": [
687
  {
@@ -690,7 +276,7 @@
690
  "[]"
691
  ]
692
  },
693
- "execution_count": 62,
694
  "metadata": {},
695
  "output_type": "execute_result"
696
  }
@@ -701,9 +287,9 @@
701
  "# traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
702
  "\n",
703
  "traindataset_df = df_filtered[ (df_filtered.date.dt.date <date(2020, 3, 11))]\n",
704
- "testdataset = testdataset_df.drop(columns=[\"date\"]).rolling(window = 10, step=5, min_periods=1).mean().values\n",
705
  "\n",
706
- "traindataset = traindataset_df.drop(columns=[\"date\"]).rolling(window = 10, step=5, min_periods=1).mean().values\n",
707
  "\n",
708
  "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
709
  "columns_with_na"
@@ -711,7 +297,27 @@
711
  },
712
  {
713
  "cell_type": "code",
714
- "execution_count": 63,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  "metadata": {},
716
  "outputs": [
717
  {
@@ -720,18 +326,18 @@
720
  "(1157787, 909910)"
721
  ]
722
  },
723
- "execution_count": 63,
724
  "metadata": {},
725
  "output_type": "execute_result"
726
  }
727
  ],
728
  "source": [
729
- "len(traindataset_df), len(testdataset_df)"
730
  ]
731
  },
732
  {
733
  "cell_type": "code",
734
- "execution_count": 64,
735
  "metadata": {},
736
  "outputs": [],
737
  "source": [
@@ -745,9 +351,102 @@
745
  },
746
  {
747
  "cell_type": "code",
748
- "execution_count": 65,
749
  "metadata": {},
750
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
  "source": [
752
  "train,test = traindataset,testdataset\n",
753
  "\n",
@@ -778,7 +477,7 @@
778
  "\n",
779
  "checkpoint_path = \"lstm_smooth_01.tf\"\n",
780
  "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
781
- "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
782
  ]
783
  },
784
  {
@@ -789,10 +488,10 @@
789
  {
790
  "data": {
791
  "text/plain": [
792
- "<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1b2861bd190>"
793
  ]
794
  },
795
- "execution_count": 11,
796
  "metadata": {},
797
  "output_type": "execute_result"
798
  }
@@ -810,7 +509,7 @@
810
  "name": "stdout",
811
  "output_type": "stream",
812
  "text": [
813
- "5686/5686 [==============================] - 27s 5ms/step\n"
814
  ]
815
  }
816
  ],
@@ -820,7 +519,7 @@
820
  },
821
  {
822
  "cell_type": "code",
823
- "execution_count": null,
824
  "metadata": {},
825
  "outputs": [],
826
  "source": [
@@ -841,7 +540,7 @@
841
  },
842
  {
843
  "cell_type": "code",
844
- "execution_count": null,
845
  "metadata": {},
846
  "outputs": [],
847
  "source": [
@@ -878,7 +577,7 @@
878
  },
879
  {
880
  "cell_type": "code",
881
- "execution_count": null,
882
  "metadata": {},
883
  "outputs": [],
884
  "source": [
@@ -917,7 +616,7 @@
917
  },
918
  {
919
  "cell_type": "code",
920
- "execution_count": null,
921
  "metadata": {},
922
  "outputs": [],
923
  "source": [
@@ -938,7 +637,7 @@
938
  },
939
  {
940
  "cell_type": "code",
941
- "execution_count": null,
942
  "metadata": {},
943
  "outputs": [],
944
  "source": [
@@ -1006,123 +705,7 @@
1006
  },
1007
  {
1008
  "cell_type": "code",
1009
- "execution_count": null,
1010
- "metadata": {},
1011
- "outputs": [
1012
- {
1013
- "data": {
1014
- "text/plain": [
1015
- "(181982, 15)"
1016
- ]
1017
- },
1018
- "execution_count": 26,
1019
- "metadata": {},
1020
- "output_type": "execute_result"
1021
- }
1022
- ],
1023
- "source": [
1024
- "testdataset.shape"
1025
- ]
1026
- },
1027
- {
1028
- "cell_type": "code",
1029
- "execution_count": null,
1030
- "metadata": {},
1031
- "outputs": [
1032
- {
1033
- "data": {
1034
- "text/plain": [
1035
- "(181951, 8)"
1036
- ]
1037
- },
1038
- "execution_count": 28,
1039
- "metadata": {},
1040
- "output_type": "execute_result"
1041
- }
1042
- ],
1043
- "source": [
1044
- "test_predict1.shape"
1045
- ]
1046
- },
1047
- {
1048
- "cell_type": "code",
1049
- "execution_count": null,
1050
- "metadata": {},
1051
- "outputs": [
1052
- {
1053
- "data": {
1054
- "text/plain": [
1055
- "array([108.04575472, 65.85715493, 47.79928153, 71.09534962,\n",
1056
- " 56.33539828, 67.06136834, 73.87258151, 51.46057509,\n",
1057
- " 32.91318188, 28.12291834, 13.58804695, 13.24250204,\n",
1058
- " 6.3366788 , 66.41283778, 176.8329019 ])"
1059
- ]
1060
- },
1061
- "execution_count": 30,
1062
- "metadata": {},
1063
- "output_type": "execute_result"
1064
- }
1065
- ],
1066
- "source": [
1067
- "scaler.mean_"
1068
- ]
1069
- },
1070
- {
1071
- "cell_type": "code",
1072
- "execution_count": null,
1073
- "metadata": {},
1074
- "outputs": [
1075
- {
1076
- "data": {
1077
- "text/plain": [
1078
- "array([2.23555351e+02, 4.88454343e+00, 6.76207201e+02, 3.86856317e+00,\n",
1079
- " 6.72235289e+01, 7.04553897e+00, 2.03829988e+02, 1.46671335e+02,\n",
1080
- " 1.53229114e+02, 1.01090815e+02, 2.37177860e+01, 1.97707428e+01,\n",
1081
- " 2.76565556e+01, 4.60824153e+02, 6.83930692e+04])"
1082
- ]
1083
- },
1084
- "execution_count": 31,
1085
- "metadata": {},
1086
- "output_type": "execute_result"
1087
- }
1088
- ],
1089
- "source": [
1090
- "scaler.var_"
1091
- ]
1092
- },
1093
- {
1094
- "cell_type": "code",
1095
- "execution_count": null,
1096
- "metadata": {},
1097
- "outputs": [
1098
- {
1099
- "data": {
1100
- "text/plain": [
1101
- "array([[109.83607997, 65.7232677 , 102.42839746, ..., 67.14066092,\n",
1102
- " 90.56450819, 66.22438437],\n",
1103
- " [100.28441846, 66.40819637, 123.52383974, ..., 68.39884677,\n",
1104
- " 71.74945776, 60.3140524 ],\n",
1105
- " [100.83776313, 65.46071865, -55.82973994, ..., 66.55045523,\n",
1106
- " 64.49064254, 66.48224704],\n",
1107
- " ...,\n",
1108
- " [ 70.86386298, 65.98717901, 118.99624806, ..., 67.35991191,\n",
1109
- " 43.36234531, 29.05084393],\n",
1110
- " [ 71.26526339, 65.9891675 , 118.33246354, ..., 67.25223838,\n",
1111
- " 50.88386299, 46.49937637],\n",
1112
- " [ 71.28495765, 65.85019898, 114.35237621, ..., 67.29575831,\n",
1113
- " 40.09704965, 20.1328048 ]])"
1114
- ]
1115
- },
1116
- "execution_count": 34,
1117
- "metadata": {},
1118
- "output_type": "execute_result"
1119
- }
1120
- ],
1121
- "source": []
1122
- },
1123
- {
1124
- "cell_type": "code",
1125
- "execution_count": null,
1126
  "metadata": {},
1127
  "outputs": [],
1128
  "source": [
@@ -1131,7 +714,7 @@
1131
  "import matplotlib.pyplot as plt\n",
1132
  "# Generating random data for demonstration\n",
1133
  "np.random.seed(0)\n",
1134
- "X = (test_predict1-y_test) * scaler.var_[0:8] + scaler.mean_[0:8]\n",
1135
  "k = 6\n",
1136
  "\n",
1137
  "kmeans = KMeans(n_clusters=k)\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
22
  },
23
  {
24
  "cell_type": "code",
25
+ "execution_count": 2,
26
  "metadata": {},
27
  "outputs": [
28
  {
 
31
  "[]"
32
  ]
33
  },
34
+ "execution_count": 2,
35
  "metadata": {},
36
  "output_type": "execute_result"
37
  }
 
43
  },
44
  {
45
  "cell_type": "code",
46
+ "execution_count": 4,
47
  "metadata": {},
48
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  "source": [
50
  "merged = pd.read_csv(r'../data/long_merge.csv')\n",
51
  "\n",
 
70
  },
71
  {
72
  "cell_type": "code",
73
+ "execution_count": null,
74
  "metadata": {},
75
  "outputs": [
76
  {
 
207
  "[2 rows x 23 columns]"
208
  ]
209
  },
210
+ "execution_count": 81,
211
  "metadata": {},
212
  "output_type": "execute_result"
213
  }
 
230
  },
231
  {
232
  "cell_type": "code",
233
+ "execution_count": null,
234
  "metadata": {},
235
  "outputs": [],
236
  "source": [
 
245
  },
246
  {
247
  "cell_type": "code",
248
+ "execution_count": null,
249
  "metadata": {},
250
  "outputs": [],
251
  "source": [
 
267
  },
268
  {
269
  "cell_type": "code",
270
+ "execution_count": null,
271
  "metadata": {},
272
  "outputs": [
273
  {
 
276
  "[]"
277
  ]
278
  },
279
+ "execution_count": 98,
280
  "metadata": {},
281
  "output_type": "execute_result"
282
  }
 
287
  "# traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
288
  "\n",
289
  "traindataset_df = df_filtered[ (df_filtered.date.dt.date <date(2020, 3, 11))]\n",
290
+ "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n",
291
  "\n",
292
+ "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n",
293
  "\n",
294
  "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
295
  "columns_with_na"
 
297
  },
298
  {
299
  "cell_type": "code",
300
+ "execution_count": null,
301
+ "metadata": {},
302
+ "outputs": [
303
+ {
304
+ "data": {
305
+ "text/plain": [
306
+ "2072154"
307
+ ]
308
+ },
309
+ "execution_count": 110,
310
+ "metadata": {},
311
+ "output_type": "execute_result"
312
+ }
313
+ ],
314
+ "source": [
315
+ "len(merged)"
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": null,
321
  "metadata": {},
322
  "outputs": [
323
  {
 
326
  "(1157787, 909910)"
327
  ]
328
  },
329
+ "execution_count": 99,
330
  "metadata": {},
331
  "output_type": "execute_result"
332
  }
333
  ],
334
  "source": [
335
+ "len(traindataset), len(testdataset)"
336
  ]
337
  },
338
  {
339
  "cell_type": "code",
340
+ "execution_count": null,
341
  "metadata": {},
342
  "outputs": [],
343
  "source": [
 
351
  },
352
  {
353
  "cell_type": "code",
354
+ "execution_count": null,
355
  "metadata": {},
356
+ "outputs": [
357
+ {
358
+ "name": "stdout",
359
+ "output_type": "stream",
360
+ "text": [
361
+ "Epoch 1/5\n",
362
+ "9045/9045 [==============================] - ETA: 0s - loss: 0.0405\n",
363
+ "Epoch 1: val_loss improved from inf to 0.03129, saving model to lstm_smooth_01.tf\n",
364
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
365
+ ]
366
+ },
367
+ {
368
+ "name": "stderr",
369
+ "output_type": "stream",
370
+ "text": [
371
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
372
+ ]
373
+ },
374
+ {
375
+ "name": "stdout",
376
+ "output_type": "stream",
377
+ "text": [
378
+ "9045/9045 [==============================] - 346s 38ms/step - loss: 0.0405 - val_loss: 0.0313\n",
379
+ "Epoch 2/5\n",
380
+ "9045/9045 [==============================] - ETA: 0s - loss: 0.0228\n",
381
+ "Epoch 2: val_loss improved from 0.03129 to 0.02697, saving model to lstm_smooth_01.tf\n",
382
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
383
+ ]
384
+ },
385
+ {
386
+ "name": "stderr",
387
+ "output_type": "stream",
388
+ "text": [
389
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
390
+ ]
391
+ },
392
+ {
393
+ "name": "stdout",
394
+ "output_type": "stream",
395
+ "text": [
396
+ "9045/9045 [==============================] - 500s 55ms/step - loss: 0.0228 - val_loss: 0.0270\n",
397
+ "Epoch 3/5\n",
398
+ "9044/9045 [============================>.] - ETA: 0s - loss: 0.0211\n",
399
+ "Epoch 3: val_loss improved from 0.02697 to 0.02597, saving model to lstm_smooth_01.tf\n",
400
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
401
+ ]
402
+ },
403
+ {
404
+ "name": "stderr",
405
+ "output_type": "stream",
406
+ "text": [
407
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
408
+ ]
409
+ },
410
+ {
411
+ "name": "stdout",
412
+ "output_type": "stream",
413
+ "text": [
414
+ "9045/9045 [==============================] - 389s 43ms/step - loss: 0.0211 - val_loss: 0.0260\n",
415
+ "Epoch 4/5\n",
416
+ "9044/9045 [============================>.] - ETA: 0s - loss: 0.0203\n",
417
+ "Epoch 4: val_loss improved from 0.02597 to 0.02452, saving model to lstm_smooth_01.tf\n",
418
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
419
+ ]
420
+ },
421
+ {
422
+ "name": "stderr",
423
+ "output_type": "stream",
424
+ "text": [
425
+ "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
426
+ ]
427
+ },
428
+ {
429
+ "name": "stdout",
430
+ "output_type": "stream",
431
+ "text": [
432
+ "9045/9045 [==============================] - 433s 48ms/step - loss: 0.0203 - val_loss: 0.0245\n",
433
+ "Epoch 5/5\n",
434
+ "9044/9045 [============================>.] - ETA: 0s - loss: 0.0198\n",
435
+ "Epoch 5: val_loss did not improve from 0.02452\n",
436
+ "9045/9045 [==============================] - 420s 46ms/step - loss: 0.0198 - val_loss: 0.0251\n"
437
+ ]
438
+ },
439
+ {
440
+ "data": {
441
+ "text/plain": [
442
+ "<keras.src.callbacks.History at 0x1b4590f0250>"
443
+ ]
444
+ },
445
+ "execution_count": 101,
446
+ "metadata": {},
447
+ "output_type": "execute_result"
448
+ }
449
+ ],
450
  "source": [
451
  "train,test = traindataset,testdataset\n",
452
  "\n",
 
477
  "\n",
478
  "checkpoint_path = \"lstm_smooth_01.tf\"\n",
479
  "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
480
+ "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=128, verbose=1, callbacks=[checkpoint_callback])"
481
  ]
482
  },
483
  {
 
488
  {
489
  "data": {
490
  "text/plain": [
491
+ "<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1b41f862c10>"
492
  ]
493
  },
494
+ "execution_count": 102,
495
  "metadata": {},
496
  "output_type": "execute_result"
497
  }
 
509
  "name": "stdout",
510
  "output_type": "stream",
511
  "text": [
512
+ "28434/28434 [==============================] - 168s 6ms/step\n"
513
  ]
514
  }
515
  ],
 
519
  },
520
  {
521
  "cell_type": "code",
522
+ "execution_count": 109,
523
  "metadata": {},
524
  "outputs": [],
525
  "source": [
 
540
  },
541
  {
542
  "cell_type": "code",
543
+ "execution_count": 105,
544
  "metadata": {},
545
  "outputs": [],
546
  "source": [
 
577
  },
578
  {
579
  "cell_type": "code",
580
+ "execution_count": 106,
581
  "metadata": {},
582
  "outputs": [],
583
  "source": [
 
616
  },
617
  {
618
  "cell_type": "code",
619
+ "execution_count": 111,
620
  "metadata": {},
621
  "outputs": [],
622
  "source": [
 
637
  },
638
  {
639
  "cell_type": "code",
640
+ "execution_count": 117,
641
  "metadata": {},
642
  "outputs": [],
643
  "source": [
 
705
  },
706
  {
707
  "cell_type": "code",
708
+ "execution_count": 116,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  "metadata": {},
710
  "outputs": [],
711
  "source": [
 
714
  "import matplotlib.pyplot as plt\n",
715
  "# Generating random data for demonstration\n",
716
  "np.random.seed(0)\n",
717
+ "X = (test_predict1 * scaler.var_[0:8] + scaler.mean_[0:8]) - (y_test * scaler.var_[0:8] + scaler.mean_[0:8])\n",
718
  "k = 6\n",
719
  "\n",
720
  "kmeans = KMeans(n_clusters=k)\n",
physLSTM/lstm_vav.ipynb ADDED
@@ -0,0 +1,1186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd \n",
10
+ "from datetime import datetime \n",
11
+ "from datetime import date\n",
12
+ "import matplotlib.pyplot as plt\n",
13
+ "import numpy as np\n",
14
+ "import pandas as pd\n",
15
+ "from keras.models import Sequential\n",
16
+ "from keras.layers import LSTM, Dense\n",
17
+ "from sklearn.model_selection import train_test_split\n",
18
+ "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
19
+ "from keras.callbacks import ModelCheckpoint\n",
20
+ "import tensorflow as tf"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "merged = pd.read_csv(r'../data/long_merge.csv')"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 10,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "ename": "MemoryError",
39
+ "evalue": "Unable to allocate 8.15 GiB for an array with shape (528, 2072154) and data type float64",
40
+ "output_type": "error",
41
+ "traceback": [
42
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
43
+ "\u001b[1;31mMemoryError\u001b[0m Traceback (most recent call last)",
44
+ "Cell \u001b[1;32mIn[10], line 23\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;66;03m# for rtu in rtus:\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;66;03m# for column in merged.columns:\u001b[39;00m\n\u001b[0;32m 16\u001b[0m \u001b[38;5;66;03m# if f\"rtu_00{rtu}_fltrd_sa\" in column:\u001b[39;00m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;66;03m# cols.append(column)\u001b[39;00m\n\u001b[0;32m 18\u001b[0m cols \u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m cols \u001b[38;5;241m+\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mair_temp_set_1\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 19\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mair_temp_set_2\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 20\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdew_point_temperature_set_1d\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 21\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrelative_humidity_set_1\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 22\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msolar_radiation_set_1\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m---> 23\u001b[0m input_dataset \u001b[38;5;241m=\u001b[39m \u001b[43mmerged\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcols\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 24\u001b[0m input_dataset\u001b[38;5;241m.\u001b[39mcolumns\n",
45
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\frame.py:4105\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 4102\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(indexer, \u001b[38;5;28mslice\u001b[39m):\n\u001b[0;32m 4103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_slice(indexer, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m-> 4105\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_take_with_is_copy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_single_key:\n\u001b[0;32m 4108\u001b[0m \u001b[38;5;66;03m# What does looking for a single key in a non-unique index return?\u001b[39;00m\n\u001b[0;32m 4109\u001b[0m \u001b[38;5;66;03m# The behavior is inconsistent. It returns a Series, except when\u001b[39;00m\n\u001b[0;32m 4110\u001b[0m \u001b[38;5;66;03m# - the key itself is repeated (test on data.shape, #9519), or\u001b[39;00m\n\u001b[0;32m 4111\u001b[0m \u001b[38;5;66;03m# - we have a MultiIndex on columns (test on self.columns, #21309)\u001b[39;00m\n\u001b[0;32m 4112\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns, MultiIndex):\n\u001b[0;32m 4113\u001b[0m \u001b[38;5;66;03m# GH#26490 using data[key] can cause RecursionError\u001b[39;00m\n",
46
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\generic.py:4150\u001b[0m, in \u001b[0;36mNDFrame._take_with_is_copy\u001b[1;34m(self, indices, axis)\u001b[0m\n\u001b[0;32m 4139\u001b[0m \u001b[38;5;129m@final\u001b[39m\n\u001b[0;32m 4140\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_take_with_is_copy\u001b[39m(\u001b[38;5;28mself\u001b[39m, indices, axis: Axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[0;32m 4141\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4142\u001b[0m \u001b[38;5;124;03m Internal version of the `take` method that sets the `_is_copy`\u001b[39;00m\n\u001b[0;32m 4143\u001b[0m \u001b[38;5;124;03m attribute to keep track of the parent dataframe (using in indexing\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4148\u001b[0m \u001b[38;5;124;03m See the docstring of `take` for full explanation of the parameters.\u001b[39;00m\n\u001b[0;32m 4149\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4150\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4151\u001b[0m \u001b[38;5;66;03m# Maybe set copy if we didn't actually change the index.\u001b[39;00m\n\u001b[0;32m 4152\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m result\u001b[38;5;241m.\u001b[39m_get_axis(axis)\u001b[38;5;241m.\u001b[39mequals(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_axis(axis)):\n",
47
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\generic.py:4130\u001b[0m, in \u001b[0;36mNDFrame.take\u001b[1;34m(self, indices, axis, **kwargs)\u001b[0m\n\u001b[0;32m 4125\u001b[0m \u001b[38;5;66;03m# We can get here with a slice via DataFrame.__getitem__\u001b[39;00m\n\u001b[0;32m 4126\u001b[0m indices \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(\n\u001b[0;32m 4127\u001b[0m indices\u001b[38;5;241m.\u001b[39mstart, indices\u001b[38;5;241m.\u001b[39mstop, indices\u001b[38;5;241m.\u001b[39mstep, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mintp\n\u001b[0;32m 4128\u001b[0m )\n\u001b[1;32m-> 4130\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 4131\u001b[0m \u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4132\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_block_manager_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4133\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 4134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor_from_mgr(new_data, axes\u001b[38;5;241m=\u001b[39mnew_data\u001b[38;5;241m.\u001b[39maxes)\u001b[38;5;241m.\u001b[39m__finalize__(\n\u001b[0;32m 4136\u001b[0m \u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtake\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 4137\u001b[0m )\n",
48
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:894\u001b[0m, in \u001b[0;36mBaseBlockManager.take\u001b[1;34m(self, indexer, axis, verify)\u001b[0m\n\u001b[0;32m 891\u001b[0m indexer \u001b[38;5;241m=\u001b[39m maybe_convert_indices(indexer, n, verify\u001b[38;5;241m=\u001b[39mverify)\n\u001b[0;32m 893\u001b[0m new_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes[axis]\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m--> 894\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreindex_indexer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 895\u001b[0m \u001b[43m \u001b[49m\u001b[43mnew_axis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_labels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 896\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 897\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 898\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_dups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 899\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 900\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
49
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:680\u001b[0m, in \u001b[0;36mBaseBlockManager.reindex_indexer\u001b[1;34m(self, new_axis, indexer, axis, fill_value, allow_dups, copy, only_slice, use_na_proxy)\u001b[0m\n\u001b[0;32m 677\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRequested axis not found in manager\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 679\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m--> 680\u001b[0m new_blocks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_slice_take_blocks_ax0\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 681\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 682\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 683\u001b[0m \u001b[43m \u001b[49m\u001b[43monly_slice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43monly_slice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 684\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_na_proxy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_na_proxy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 685\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 686\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 687\u001b[0m new_blocks \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 688\u001b[0m blk\u001b[38;5;241m.\u001b[39mtake_nd(\n\u001b[0;32m 689\u001b[0m indexer,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 695\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m blk \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocks\n\u001b[0;32m 696\u001b[0m ]\n",
50
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:843\u001b[0m, in \u001b[0;36mBaseBlockManager._slice_take_blocks_ax0\u001b[1;34m(self, slice_or_indexer, fill_value, only_slice, use_na_proxy, ref_inplace_op)\u001b[0m\n\u001b[0;32m 841\u001b[0m blocks\u001b[38;5;241m.\u001b[39mappend(nb)\n\u001b[0;32m 842\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 843\u001b[0m nb \u001b[38;5;241m=\u001b[39m \u001b[43mblk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake_nd\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtaker\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_mgr_locs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmgr_locs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 844\u001b[0m blocks\u001b[38;5;241m.\u001b[39mappend(nb)\n\u001b[0;32m 846\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m blocks\n",
51
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\blocks.py:1307\u001b[0m, in \u001b[0;36mBlock.take_nd\u001b[1;34m(self, indexer, axis, new_mgr_locs, fill_value)\u001b[0m\n\u001b[0;32m 1304\u001b[0m allow_fill \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 1306\u001b[0m \u001b[38;5;66;03m# Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype\u001b[39;00m\n\u001b[1;32m-> 1307\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43malgos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1308\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_fill\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_fill\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\n\u001b[0;32m 1309\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1311\u001b[0m \u001b[38;5;66;03m# Called from three places in managers, all of which satisfy\u001b[39;00m\n\u001b[0;32m 1312\u001b[0m \u001b[38;5;66;03m# these assertions\u001b[39;00m\n\u001b[0;32m 1313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m, ExtensionBlock):\n\u001b[0;32m 1314\u001b[0m \u001b[38;5;66;03m# NB: in this case, the 'axis' kwarg will be ignored in the\u001b[39;00m\n\u001b[0;32m 1315\u001b[0m \u001b[38;5;66;03m# algos.take_nd call above.\u001b[39;00m\n",
52
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\array_algos\\take.py:117\u001b[0m, in \u001b[0;36mtake_nd\u001b[1;34m(arr, indexer, axis, fill_value, allow_fill)\u001b[0m\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mtake(indexer, fill_value\u001b[38;5;241m=\u001b[39mfill_value, allow_fill\u001b[38;5;241m=\u001b[39mallow_fill)\n\u001b[0;32m 116\u001b[0m arr \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(arr)\n\u001b[1;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_take_nd_ndarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_fill\u001b[49m\u001b[43m)\u001b[49m\n",
53
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\array_algos\\take.py:157\u001b[0m, in \u001b[0;36m_take_nd_ndarray\u001b[1;34m(arr, indexer, axis, fill_value, allow_fill)\u001b[0m\n\u001b[0;32m 155\u001b[0m out \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(out_shape, dtype\u001b[38;5;241m=\u001b[39mdtype, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mF\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 156\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 157\u001b[0m out \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(out_shape, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 159\u001b[0m func \u001b[38;5;241m=\u001b[39m _get_take_nd_function(\n\u001b[0;32m 160\u001b[0m arr\u001b[38;5;241m.\u001b[39mndim, arr\u001b[38;5;241m.\u001b[39mdtype, out\u001b[38;5;241m.\u001b[39mdtype, axis\u001b[38;5;241m=\u001b[39maxis, mask_info\u001b[38;5;241m=\u001b[39mmask_info\n\u001b[0;32m 161\u001b[0m )\n\u001b[0;32m 162\u001b[0m func(arr, indexer, out, fill_value)\n",
54
+ "\u001b[1;31mMemoryError\u001b[0m: Unable to allocate 8.15 GiB for an array with shape (528, 2072154) and data type float64"
55
+ ]
56
+ }
57
+ ],
58
+ "source": [
59
+ "zones = [69, 68,67, 66,65.64, 42,41,40,39,38,37,36]\n",
60
+ "rtus = [1]\n",
61
+ "cols = []\n",
62
+ "\n",
63
+ "for zone in zones:\n",
64
+ " for column in merged.columns:\n",
65
+ " if f\"zone_0{zone}\" in column and 'co2' not in column and \"hw_valve\" not in column and \"cooling_sp\" not in column and \"heating_sp\" not in column:\n",
66
+ " cols.append(column)\n",
67
+ "\n",
68
+ "for zone in zones:\n",
69
+ " for column in merged.columns:\n",
70
+ " if f\"zone_0{zone}\" in column and \"cooling_sp\" in column or \"heating_sp\" in column:\n",
71
+ " cols.append(column)\n",
72
+ "# for rtu in rtus:\n",
73
+ "# for column in merged.columns:\n",
74
+ "# if f\"rtu_00{rtu}_fltrd_sa\" in column:\n",
75
+ "# cols.append(column)\n",
76
+ "cols =['date'] + cols + ['air_temp_set_1',\n",
77
+ " 'air_temp_set_2',\n",
78
+ " 'dew_point_temperature_set_1d',\n",
79
+ " 'relative_humidity_set_1',\n",
80
+ " 'solar_radiation_set_1']\n",
81
+ "input_dataset = merged[cols]"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": 11,
87
+ "metadata": {},
88
+ "outputs": [
89
+ {
90
+ "name": "stderr",
91
+ "output_type": "stream",
92
+ "text": [
93
+ "C:\\Users\\arbal\\AppData\\Local\\Temp\\ipykernel_32464\\216607548.py:1: SettingWithCopyWarning: \n",
94
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
95
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
96
+ "\n",
97
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
98
+ " input_dataset['date'] = pd.to_datetime(input_dataset['date'], format = \"%Y-%m-%d %H:%M:%S\")\n"
99
+ ]
100
+ },
101
+ {
102
+ "ename": "MemoryError",
103
+ "evalue": "Unable to allocate 8.15 GiB for an array with shape (528, 2070713) and data type float64",
104
+ "output_type": "error",
105
+ "traceback": [
106
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
107
+ "\u001b[1;31mMemoryError\u001b[0m Traceback (most recent call last)",
108
+ "Cell \u001b[1;32mIn[11], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m input_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mto_datetime(input_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m], \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY-\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mH:\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mM:\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mS\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m df_filtered \u001b[38;5;241m=\u001b[39m \u001b[43minput_dataset\u001b[49m\u001b[43m[\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_dataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43mdate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2018\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m&\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_dataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate\u001b[49m\u001b[38;5;241;43m<\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2021\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m df_filtered\u001b[38;5;241m.\u001b[39misna()\u001b[38;5;241m.\u001b[39many()\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThere are NA values in the DataFrame columns.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
109
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\frame.py:4081\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 4079\u001b[0m \u001b[38;5;66;03m# Do we have a (boolean) 1d indexer?\u001b[39;00m\n\u001b[0;32m 4080\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mis_bool_indexer(key):\n\u001b[1;32m-> 4081\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_bool_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4083\u001b[0m \u001b[38;5;66;03m# We are left with two options: a single key, and a collection of keys,\u001b[39;00m\n\u001b[0;32m 4084\u001b[0m \u001b[38;5;66;03m# We interpret tuples as collections only for non-MultiIndex\u001b[39;00m\n\u001b[0;32m 4085\u001b[0m is_single_key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_list_like(key)\n",
110
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\frame.py:4143\u001b[0m, in \u001b[0;36mDataFrame._getitem_bool_array\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 4140\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m 4142\u001b[0m indexer \u001b[38;5;241m=\u001b[39m key\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m-> 4143\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_take_with_is_copy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n",
111
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\generic.py:4150\u001b[0m, in \u001b[0;36mNDFrame._take_with_is_copy\u001b[1;34m(self, indices, axis)\u001b[0m\n\u001b[0;32m 4139\u001b[0m \u001b[38;5;129m@final\u001b[39m\n\u001b[0;32m 4140\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_take_with_is_copy\u001b[39m(\u001b[38;5;28mself\u001b[39m, indices, axis: Axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Self:\n\u001b[0;32m 4141\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4142\u001b[0m \u001b[38;5;124;03m Internal version of the `take` method that sets the `_is_copy`\u001b[39;00m\n\u001b[0;32m 4143\u001b[0m \u001b[38;5;124;03m attribute to keep track of the parent dataframe (using in indexing\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4148\u001b[0m \u001b[38;5;124;03m See the docstring of `take` for full explanation of the parameters.\u001b[39;00m\n\u001b[0;32m 4149\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4150\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4151\u001b[0m \u001b[38;5;66;03m# Maybe set copy if we didn't actually change the index.\u001b[39;00m\n\u001b[0;32m 4152\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m result\u001b[38;5;241m.\u001b[39m_get_axis(axis)\u001b[38;5;241m.\u001b[39mequals(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_axis(axis)):\n",
112
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\generic.py:4130\u001b[0m, in \u001b[0;36mNDFrame.take\u001b[1;34m(self, indices, axis, **kwargs)\u001b[0m\n\u001b[0;32m 4125\u001b[0m \u001b[38;5;66;03m# We can get here with a slice via DataFrame.__getitem__\u001b[39;00m\n\u001b[0;32m 4126\u001b[0m indices \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(\n\u001b[0;32m 4127\u001b[0m indices\u001b[38;5;241m.\u001b[39mstart, indices\u001b[38;5;241m.\u001b[39mstop, indices\u001b[38;5;241m.\u001b[39mstep, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mintp\n\u001b[0;32m 4128\u001b[0m )\n\u001b[1;32m-> 4130\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 4131\u001b[0m \u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4132\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_block_manager_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 4133\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 4134\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor_from_mgr(new_data, axes\u001b[38;5;241m=\u001b[39mnew_data\u001b[38;5;241m.\u001b[39maxes)\u001b[38;5;241m.\u001b[39m__finalize__(\n\u001b[0;32m 4136\u001b[0m \u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtake\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 4137\u001b[0m )\n",
113
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:894\u001b[0m, in \u001b[0;36mBaseBlockManager.take\u001b[1;34m(self, indexer, axis, verify)\u001b[0m\n\u001b[0;32m 891\u001b[0m indexer \u001b[38;5;241m=\u001b[39m maybe_convert_indices(indexer, n, verify\u001b[38;5;241m=\u001b[39mverify)\n\u001b[0;32m 893\u001b[0m new_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes[axis]\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m--> 894\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreindex_indexer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 895\u001b[0m \u001b[43m \u001b[49m\u001b[43mnew_axis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_labels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 896\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 897\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 898\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_dups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 899\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 900\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
114
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:687\u001b[0m, in \u001b[0;36mBaseBlockManager.reindex_indexer\u001b[1;34m(self, new_axis, indexer, axis, fill_value, allow_dups, copy, only_slice, use_na_proxy)\u001b[0m\n\u001b[0;32m 680\u001b[0m new_blocks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_slice_take_blocks_ax0(\n\u001b[0;32m 681\u001b[0m indexer,\n\u001b[0;32m 682\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[0;32m 683\u001b[0m only_slice\u001b[38;5;241m=\u001b[39monly_slice,\n\u001b[0;32m 684\u001b[0m use_na_proxy\u001b[38;5;241m=\u001b[39muse_na_proxy,\n\u001b[0;32m 685\u001b[0m )\n\u001b[0;32m 686\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 687\u001b[0m new_blocks \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\n\u001b[0;32m 688\u001b[0m \u001b[43m \u001b[49m\u001b[43mblk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 689\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 690\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 691\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mblk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfill_value\u001b[49m\n\u001b[0;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 694\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 695\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mblk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblocks\u001b[49m\n\u001b[0;32m 696\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 698\u001b[0m new_axes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes)\n\u001b[0;32m 699\u001b[0m new_axes[axis] \u001b[38;5;241m=\u001b[39m new_axis\n",
115
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:688\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 680\u001b[0m new_blocks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_slice_take_blocks_ax0(\n\u001b[0;32m 681\u001b[0m indexer,\n\u001b[0;32m 682\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[0;32m 683\u001b[0m only_slice\u001b[38;5;241m=\u001b[39monly_slice,\n\u001b[0;32m 684\u001b[0m use_na_proxy\u001b[38;5;241m=\u001b[39muse_na_proxy,\n\u001b[0;32m 685\u001b[0m )\n\u001b[0;32m 686\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 687\u001b[0m new_blocks \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m--> 688\u001b[0m \u001b[43mblk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 689\u001b[0m \u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 690\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 691\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mblk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfill_value\u001b[49m\n\u001b[0;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 694\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 695\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m blk \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocks\n\u001b[0;32m 696\u001b[0m ]\n\u001b[0;32m 698\u001b[0m new_axes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes)\n\u001b[0;32m 699\u001b[0m new_axes[axis] \u001b[38;5;241m=\u001b[39m new_axis\n",
116
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\internals\\blocks.py:1307\u001b[0m, in \u001b[0;36mBlock.take_nd\u001b[1;34m(self, indexer, axis, new_mgr_locs, fill_value)\u001b[0m\n\u001b[0;32m 1304\u001b[0m allow_fill \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 1306\u001b[0m \u001b[38;5;66;03m# Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype\u001b[39;00m\n\u001b[1;32m-> 1307\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43malgos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtake_nd\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1308\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_fill\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_fill\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\n\u001b[0;32m 1309\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1311\u001b[0m \u001b[38;5;66;03m# Called from three places in managers, all of which satisfy\u001b[39;00m\n\u001b[0;32m 1312\u001b[0m \u001b[38;5;66;03m# these assertions\u001b[39;00m\n\u001b[0;32m 1313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m, ExtensionBlock):\n\u001b[0;32m 1314\u001b[0m \u001b[38;5;66;03m# NB: in this case, the 'axis' kwarg will be ignored in the\u001b[39;00m\n\u001b[0;32m 1315\u001b[0m \u001b[38;5;66;03m# algos.take_nd call above.\u001b[39;00m\n",
117
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\array_algos\\take.py:117\u001b[0m, in \u001b[0;36mtake_nd\u001b[1;34m(arr, indexer, axis, fill_value, allow_fill)\u001b[0m\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mtake(indexer, fill_value\u001b[38;5;241m=\u001b[39mfill_value, allow_fill\u001b[38;5;241m=\u001b[39mallow_fill)\n\u001b[0;32m 116\u001b[0m arr \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(arr)\n\u001b[1;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_take_nd_ndarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_fill\u001b[49m\u001b[43m)\u001b[49m\n",
118
+ "File \u001b[1;32md:\\Programs\\minconda3\\envs\\smartbuildings\\Lib\\site-packages\\pandas\\core\\array_algos\\take.py:157\u001b[0m, in \u001b[0;36m_take_nd_ndarray\u001b[1;34m(arr, indexer, axis, fill_value, allow_fill)\u001b[0m\n\u001b[0;32m 155\u001b[0m out \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(out_shape, dtype\u001b[38;5;241m=\u001b[39mdtype, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mF\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 156\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 157\u001b[0m out \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(out_shape, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 159\u001b[0m func \u001b[38;5;241m=\u001b[39m _get_take_nd_function(\n\u001b[0;32m 160\u001b[0m arr\u001b[38;5;241m.\u001b[39mndim, arr\u001b[38;5;241m.\u001b[39mdtype, out\u001b[38;5;241m.\u001b[39mdtype, axis\u001b[38;5;241m=\u001b[39maxis, mask_info\u001b[38;5;241m=\u001b[39mmask_info\n\u001b[0;32m 161\u001b[0m )\n\u001b[0;32m 162\u001b[0m func(arr, indexer, out, fill_value)\n",
119
+ "\u001b[1;31mMemoryError\u001b[0m: Unable to allocate 8.15 GiB for an array with shape (528, 2070713) and data type float64"
120
+ ]
121
+ }
122
+ ],
123
+ "source": [
124
+ "input_dataset['date'] = pd.to_datetime(input_dataset['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
125
+ "df_filtered = input_dataset[ (input_dataset.date.dt.date >date(2019, 1, 1)) & (input_dataset.date.dt.date< date(2021, 1, 1))]\n",
126
+ "\n",
127
+ "if df_filtered.isna().any().any():\n",
128
+ " print(\"There are NA values in the DataFrame columns.\")"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 7,
134
+ "metadata": {},
135
+ "outputs": [
136
+ {
137
+ "data": {
138
+ "text/html": [
139
+ "<div>\n",
140
+ "<style scoped>\n",
141
+ " .dataframe tbody tr th:only-of-type {\n",
142
+ " vertical-align: middle;\n",
143
+ " }\n",
144
+ "\n",
145
+ " .dataframe tbody tr th {\n",
146
+ " vertical-align: top;\n",
147
+ " }\n",
148
+ "\n",
149
+ " .dataframe thead th {\n",
150
+ " text-align: right;\n",
151
+ " }\n",
152
+ "</style>\n",
153
+ "<table border=\"1\" class=\"dataframe\">\n",
154
+ " <thead>\n",
155
+ " <tr style=\"text-align: right;\">\n",
156
+ " <th></th>\n",
157
+ " <th>date</th>\n",
158
+ " <th>zone_069_temp</th>\n",
159
+ " <th>zone_069_fan_spd</th>\n",
160
+ " <th>zone_068_temp</th>\n",
161
+ " <th>zone_068_fan_spd</th>\n",
162
+ " <th>zone_067_temp</th>\n",
163
+ " <th>zone_067_fan_spd</th>\n",
164
+ " <th>zone_066_temp</th>\n",
165
+ " <th>zone_066_fan_spd</th>\n",
166
+ " <th>zone_042_temp</th>\n",
167
+ " <th>...</th>\n",
168
+ " <th>zone_066_heating_sp</th>\n",
169
+ " <th>zone_067_heating_sp</th>\n",
170
+ " <th>zone_069_heating_sp</th>\n",
171
+ " <th>zone_070_heating_sp</th>\n",
172
+ " <th>zone_071_heating_sp</th>\n",
173
+ " <th>air_temp_set_1</th>\n",
174
+ " <th>air_temp_set_2</th>\n",
175
+ " <th>dew_point_temperature_set_1d</th>\n",
176
+ " <th>relative_humidity_set_1</th>\n",
177
+ " <th>solar_radiation_set_1</th>\n",
178
+ " </tr>\n",
179
+ " </thead>\n",
180
+ " <tbody>\n",
181
+ " <tr>\n",
182
+ " <th>1440</th>\n",
183
+ " <td>2018-01-02 00:00:00</td>\n",
184
+ " <td>71.4</td>\n",
185
+ " <td>20.0</td>\n",
186
+ " <td>73.2</td>\n",
187
+ " <td>70.0</td>\n",
188
+ " <td>71.2</td>\n",
189
+ " <td>20.0</td>\n",
190
+ " <td>70.4</td>\n",
191
+ " <td>35.0</td>\n",
192
+ " <td>71.6</td>\n",
193
+ " <td>...</td>\n",
194
+ " <td>NaN</td>\n",
195
+ " <td>NaN</td>\n",
196
+ " <td>NaN</td>\n",
197
+ " <td>NaN</td>\n",
198
+ " <td>NaN</td>\n",
199
+ " <td>15.280</td>\n",
200
+ " <td>15.100</td>\n",
201
+ " <td>6.33</td>\n",
202
+ " <td>55.40</td>\n",
203
+ " <td>161.9</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>1441</th>\n",
207
+ " <td>2018-01-02 00:01:00</td>\n",
208
+ " <td>71.4</td>\n",
209
+ " <td>20.0</td>\n",
210
+ " <td>73.2</td>\n",
211
+ " <td>70.0</td>\n",
212
+ " <td>71.2</td>\n",
213
+ " <td>20.0</td>\n",
214
+ " <td>70.4</td>\n",
215
+ " <td>35.0</td>\n",
216
+ " <td>71.6</td>\n",
217
+ " <td>...</td>\n",
218
+ " <td>NaN</td>\n",
219
+ " <td>NaN</td>\n",
220
+ " <td>NaN</td>\n",
221
+ " <td>NaN</td>\n",
222
+ " <td>NaN</td>\n",
223
+ " <td>15.280</td>\n",
224
+ " <td>15.100</td>\n",
225
+ " <td>6.33</td>\n",
226
+ " <td>55.40</td>\n",
227
+ " <td>161.9</td>\n",
228
+ " </tr>\n",
229
+ " <tr>\n",
230
+ " <th>1442</th>\n",
231
+ " <td>2018-01-02 00:02:00</td>\n",
232
+ " <td>71.4</td>\n",
233
+ " <td>20.0</td>\n",
234
+ " <td>73.2</td>\n",
235
+ " <td>70.0</td>\n",
236
+ " <td>71.2</td>\n",
237
+ " <td>20.0</td>\n",
238
+ " <td>70.4</td>\n",
239
+ " <td>35.0</td>\n",
240
+ " <td>71.6</td>\n",
241
+ " <td>...</td>\n",
242
+ " <td>NaN</td>\n",
243
+ " <td>NaN</td>\n",
244
+ " <td>NaN</td>\n",
245
+ " <td>NaN</td>\n",
246
+ " <td>NaN</td>\n",
247
+ " <td>15.280</td>\n",
248
+ " <td>15.100</td>\n",
249
+ " <td>6.33</td>\n",
250
+ " <td>55.40</td>\n",
251
+ " <td>161.9</td>\n",
252
+ " </tr>\n",
253
+ " <tr>\n",
254
+ " <th>1443</th>\n",
255
+ " <td>2018-01-02 00:03:00</td>\n",
256
+ " <td>71.4</td>\n",
257
+ " <td>20.0</td>\n",
258
+ " <td>73.2</td>\n",
259
+ " <td>70.0</td>\n",
260
+ " <td>71.2</td>\n",
261
+ " <td>20.0</td>\n",
262
+ " <td>70.4</td>\n",
263
+ " <td>35.0</td>\n",
264
+ " <td>71.6</td>\n",
265
+ " <td>...</td>\n",
266
+ " <td>NaN</td>\n",
267
+ " <td>NaN</td>\n",
268
+ " <td>NaN</td>\n",
269
+ " <td>NaN</td>\n",
270
+ " <td>NaN</td>\n",
271
+ " <td>15.280</td>\n",
272
+ " <td>15.100</td>\n",
273
+ " <td>6.33</td>\n",
274
+ " <td>55.40</td>\n",
275
+ " <td>161.9</td>\n",
276
+ " </tr>\n",
277
+ " <tr>\n",
278
+ " <th>1444</th>\n",
279
+ " <td>2018-01-02 00:04:00</td>\n",
280
+ " <td>71.4</td>\n",
281
+ " <td>20.0</td>\n",
282
+ " <td>73.2</td>\n",
283
+ " <td>70.0</td>\n",
284
+ " <td>71.2</td>\n",
285
+ " <td>20.0</td>\n",
286
+ " <td>70.4</td>\n",
287
+ " <td>35.0</td>\n",
288
+ " <td>71.6</td>\n",
289
+ " <td>...</td>\n",
290
+ " <td>NaN</td>\n",
291
+ " <td>NaN</td>\n",
292
+ " <td>NaN</td>\n",
293
+ " <td>NaN</td>\n",
294
+ " <td>NaN</td>\n",
295
+ " <td>15.280</td>\n",
296
+ " <td>15.100</td>\n",
297
+ " <td>6.33</td>\n",
298
+ " <td>55.40</td>\n",
299
+ " <td>161.9</td>\n",
300
+ " </tr>\n",
301
+ " <tr>\n",
302
+ " <th>...</th>\n",
303
+ " <td>...</td>\n",
304
+ " <td>...</td>\n",
305
+ " <td>...</td>\n",
306
+ " <td>...</td>\n",
307
+ " <td>...</td>\n",
308
+ " <td>...</td>\n",
309
+ " <td>...</td>\n",
310
+ " <td>...</td>\n",
311
+ " <td>...</td>\n",
312
+ " <td>...</td>\n",
313
+ " <td>...</td>\n",
314
+ " <td>...</td>\n",
315
+ " <td>...</td>\n",
316
+ " <td>...</td>\n",
317
+ " <td>...</td>\n",
318
+ " <td>...</td>\n",
319
+ " <td>...</td>\n",
320
+ " <td>...</td>\n",
321
+ " <td>...</td>\n",
322
+ " <td>...</td>\n",
323
+ " <td>...</td>\n",
324
+ " </tr>\n",
325
+ " <tr>\n",
326
+ " <th>2072148</th>\n",
327
+ " <td>2020-12-31 23:57:00</td>\n",
328
+ " <td>68.8</td>\n",
329
+ " <td>20.0</td>\n",
330
+ " <td>71.7</td>\n",
331
+ " <td>20.0</td>\n",
332
+ " <td>70.4</td>\n",
333
+ " <td>20.0</td>\n",
334
+ " <td>68.6</td>\n",
335
+ " <td>35.0</td>\n",
336
+ " <td>71.4</td>\n",
337
+ " <td>...</td>\n",
338
+ " <td>68.0</td>\n",
339
+ " <td>68.0</td>\n",
340
+ " <td>68.0</td>\n",
341
+ " <td>65.0</td>\n",
342
+ " <td>67.0</td>\n",
343
+ " <td>13.994</td>\n",
344
+ " <td>13.528</td>\n",
345
+ " <td>4.11</td>\n",
346
+ " <td>51.61</td>\n",
347
+ " <td>188.8</td>\n",
348
+ " </tr>\n",
349
+ " <tr>\n",
350
+ " <th>2072149</th>\n",
351
+ " <td>2020-12-31 23:58:00</td>\n",
352
+ " <td>68.8</td>\n",
353
+ " <td>20.0</td>\n",
354
+ " <td>71.7</td>\n",
355
+ " <td>20.0</td>\n",
356
+ " <td>70.4</td>\n",
357
+ " <td>20.0</td>\n",
358
+ " <td>68.6</td>\n",
359
+ " <td>35.0</td>\n",
360
+ " <td>71.4</td>\n",
361
+ " <td>...</td>\n",
362
+ " <td>68.0</td>\n",
363
+ " <td>68.0</td>\n",
364
+ " <td>68.0</td>\n",
365
+ " <td>65.0</td>\n",
366
+ " <td>67.0</td>\n",
367
+ " <td>13.994</td>\n",
368
+ " <td>13.528</td>\n",
369
+ " <td>4.11</td>\n",
370
+ " <td>51.61</td>\n",
371
+ " <td>188.8</td>\n",
372
+ " </tr>\n",
373
+ " <tr>\n",
374
+ " <th>2072150</th>\n",
375
+ " <td>2020-12-31 23:58:00</td>\n",
376
+ " <td>68.8</td>\n",
377
+ " <td>20.0</td>\n",
378
+ " <td>71.7</td>\n",
379
+ " <td>20.0</td>\n",
380
+ " <td>70.4</td>\n",
381
+ " <td>20.0</td>\n",
382
+ " <td>68.6</td>\n",
383
+ " <td>35.0</td>\n",
384
+ " <td>71.4</td>\n",
385
+ " <td>...</td>\n",
386
+ " <td>68.0</td>\n",
387
+ " <td>68.0</td>\n",
388
+ " <td>68.0</td>\n",
389
+ " <td>65.0</td>\n",
390
+ " <td>67.0</td>\n",
391
+ " <td>13.994</td>\n",
392
+ " <td>13.528</td>\n",
393
+ " <td>4.11</td>\n",
394
+ " <td>51.61</td>\n",
395
+ " <td>188.8</td>\n",
396
+ " </tr>\n",
397
+ " <tr>\n",
398
+ " <th>2072151</th>\n",
399
+ " <td>2020-12-31 23:59:00</td>\n",
400
+ " <td>68.8</td>\n",
401
+ " <td>20.0</td>\n",
402
+ " <td>71.7</td>\n",
403
+ " <td>20.0</td>\n",
404
+ " <td>70.4</td>\n",
405
+ " <td>20.0</td>\n",
406
+ " <td>68.6</td>\n",
407
+ " <td>35.0</td>\n",
408
+ " <td>71.4</td>\n",
409
+ " <td>...</td>\n",
410
+ " <td>68.0</td>\n",
411
+ " <td>68.0</td>\n",
412
+ " <td>68.0</td>\n",
413
+ " <td>65.0</td>\n",
414
+ " <td>67.0</td>\n",
415
+ " <td>13.994</td>\n",
416
+ " <td>13.528</td>\n",
417
+ " <td>4.11</td>\n",
418
+ " <td>51.61</td>\n",
419
+ " <td>188.8</td>\n",
420
+ " </tr>\n",
421
+ " <tr>\n",
422
+ " <th>2072152</th>\n",
423
+ " <td>2020-12-31 23:59:00</td>\n",
424
+ " <td>68.8</td>\n",
425
+ " <td>20.0</td>\n",
426
+ " <td>71.7</td>\n",
427
+ " <td>20.0</td>\n",
428
+ " <td>70.4</td>\n",
429
+ " <td>20.0</td>\n",
430
+ " <td>68.6</td>\n",
431
+ " <td>35.0</td>\n",
432
+ " <td>71.4</td>\n",
433
+ " <td>...</td>\n",
434
+ " <td>68.0</td>\n",
435
+ " <td>68.0</td>\n",
436
+ " <td>68.0</td>\n",
437
+ " <td>65.0</td>\n",
438
+ " <td>67.0</td>\n",
439
+ " <td>13.994</td>\n",
440
+ " <td>13.528</td>\n",
441
+ " <td>4.11</td>\n",
442
+ " <td>51.61</td>\n",
443
+ " <td>188.8</td>\n",
444
+ " </tr>\n",
445
+ " </tbody>\n",
446
+ "</table>\n",
447
+ "<p>2070713 rows × 529 columns</p>\n",
448
+ "</div>"
449
+ ],
450
+ "text/plain": [
451
+ " date zone_069_temp zone_069_fan_spd zone_068_temp \\\n",
452
+ "1440 2018-01-02 00:00:00 71.4 20.0 73.2 \n",
453
+ "1441 2018-01-02 00:01:00 71.4 20.0 73.2 \n",
454
+ "1442 2018-01-02 00:02:00 71.4 20.0 73.2 \n",
455
+ "1443 2018-01-02 00:03:00 71.4 20.0 73.2 \n",
456
+ "1444 2018-01-02 00:04:00 71.4 20.0 73.2 \n",
457
+ "... ... ... ... ... \n",
458
+ "2072148 2020-12-31 23:57:00 68.8 20.0 71.7 \n",
459
+ "2072149 2020-12-31 23:58:00 68.8 20.0 71.7 \n",
460
+ "2072150 2020-12-31 23:58:00 68.8 20.0 71.7 \n",
461
+ "2072151 2020-12-31 23:59:00 68.8 20.0 71.7 \n",
462
+ "2072152 2020-12-31 23:59:00 68.8 20.0 71.7 \n",
463
+ "\n",
464
+ " zone_068_fan_spd zone_067_temp zone_067_fan_spd zone_066_temp \\\n",
465
+ "1440 70.0 71.2 20.0 70.4 \n",
466
+ "1441 70.0 71.2 20.0 70.4 \n",
467
+ "1442 70.0 71.2 20.0 70.4 \n",
468
+ "1443 70.0 71.2 20.0 70.4 \n",
469
+ "1444 70.0 71.2 20.0 70.4 \n",
470
+ "... ... ... ... ... \n",
471
+ "2072148 20.0 70.4 20.0 68.6 \n",
472
+ "2072149 20.0 70.4 20.0 68.6 \n",
473
+ "2072150 20.0 70.4 20.0 68.6 \n",
474
+ "2072151 20.0 70.4 20.0 68.6 \n",
475
+ "2072152 20.0 70.4 20.0 68.6 \n",
476
+ "\n",
477
+ " zone_066_fan_spd zone_042_temp ... zone_066_heating_sp \\\n",
478
+ "1440 35.0 71.6 ... NaN \n",
479
+ "1441 35.0 71.6 ... NaN \n",
480
+ "1442 35.0 71.6 ... NaN \n",
481
+ "1443 35.0 71.6 ... NaN \n",
482
+ "1444 35.0 71.6 ... NaN \n",
483
+ "... ... ... ... ... \n",
484
+ "2072148 35.0 71.4 ... 68.0 \n",
485
+ "2072149 35.0 71.4 ... 68.0 \n",
486
+ "2072150 35.0 71.4 ... 68.0 \n",
487
+ "2072151 35.0 71.4 ... 68.0 \n",
488
+ "2072152 35.0 71.4 ... 68.0 \n",
489
+ "\n",
490
+ " zone_067_heating_sp zone_069_heating_sp zone_070_heating_sp \\\n",
491
+ "1440 NaN NaN NaN \n",
492
+ "1441 NaN NaN NaN \n",
493
+ "1442 NaN NaN NaN \n",
494
+ "1443 NaN NaN NaN \n",
495
+ "1444 NaN NaN NaN \n",
496
+ "... ... ... ... \n",
497
+ "2072148 68.0 68.0 65.0 \n",
498
+ "2072149 68.0 68.0 65.0 \n",
499
+ "2072150 68.0 68.0 65.0 \n",
500
+ "2072151 68.0 68.0 65.0 \n",
501
+ "2072152 68.0 68.0 65.0 \n",
502
+ "\n",
503
+ " zone_071_heating_sp air_temp_set_1 air_temp_set_2 \\\n",
504
+ "1440 NaN 15.280 15.100 \n",
505
+ "1441 NaN 15.280 15.100 \n",
506
+ "1442 NaN 15.280 15.100 \n",
507
+ "1443 NaN 15.280 15.100 \n",
508
+ "1444 NaN 15.280 15.100 \n",
509
+ "... ... ... ... \n",
510
+ "2072148 67.0 13.994 13.528 \n",
511
+ "2072149 67.0 13.994 13.528 \n",
512
+ "2072150 67.0 13.994 13.528 \n",
513
+ "2072151 67.0 13.994 13.528 \n",
514
+ "2072152 67.0 13.994 13.528 \n",
515
+ "\n",
516
+ " dew_point_temperature_set_1d relative_humidity_set_1 \\\n",
517
+ "1440 6.33 55.40 \n",
518
+ "1441 6.33 55.40 \n",
519
+ "1442 6.33 55.40 \n",
520
+ "1443 6.33 55.40 \n",
521
+ "1444 6.33 55.40 \n",
522
+ "... ... ... \n",
523
+ "2072148 4.11 51.61 \n",
524
+ "2072149 4.11 51.61 \n",
525
+ "2072150 4.11 51.61 \n",
526
+ "2072151 4.11 51.61 \n",
527
+ "2072152 4.11 51.61 \n",
528
+ "\n",
529
+ " solar_radiation_set_1 \n",
530
+ "1440 161.9 \n",
531
+ "1441 161.9 \n",
532
+ "1442 161.9 \n",
533
+ "1443 161.9 \n",
534
+ "1444 161.9 \n",
535
+ "... ... \n",
536
+ "2072148 188.8 \n",
537
+ "2072149 188.8 \n",
538
+ "2072150 188.8 \n",
539
+ "2072151 188.8 \n",
540
+ "2072152 188.8 \n",
541
+ "\n",
542
+ "[2070713 rows x 529 columns]"
543
+ ]
544
+ },
545
+ "execution_count": 7,
546
+ "metadata": {},
547
+ "output_type": "execute_result"
548
+ }
549
+ ],
550
+ "source": [
551
+ "df_filtered"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 8,
557
+ "metadata": {},
558
+ "outputs": [
559
+ {
560
+ "data": {
561
+ "text/plain": [
562
+ "['zone_070_heating_sp',\n",
563
+ " 'zone_070_heating_sp',\n",
564
+ " 'zone_070_heating_sp',\n",
565
+ " 'zone_070_heating_sp',\n",
566
+ " 'zone_070_heating_sp',\n",
567
+ " 'zone_070_heating_sp',\n",
568
+ " 'zone_070_heating_sp',\n",
569
+ " 'zone_070_heating_sp',\n",
570
+ " 'zone_070_heating_sp',\n",
571
+ " 'zone_070_heating_sp',\n",
572
+ " 'zone_070_heating_sp',\n",
573
+ " 'zone_070_heating_sp']"
574
+ ]
575
+ },
576
+ "execution_count": 8,
577
+ "metadata": {},
578
+ "output_type": "execute_result"
579
+ }
580
+ ],
581
+ "source": [
582
+ "testdataset_df = df_filtered[(df_filtered.date.dt.date >date(2019, 5, 1)) & (df_filtered.date.dt.date <date(2019,7, 1))]\n",
583
+ "\n",
584
+ "# traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
585
+ "\n",
586
+ "traindataset_df = df_filtered[(df_filtered.date.dt.date >date(2019, 3, 1)) & (df_filtered.date.dt.date <date(2019, 5, 1))]\n",
587
+ "testdataset = testdataset_df.drop(columns=[\"date\"]).rolling(window = 5, step = 1, min_periods= 1).mean().values\n",
588
+ "traindataset = traindataset_df.drop(columns=[\"date\"]).rolling(window = 5, step = 1, min_periods= 1).mean().values\n",
589
+ "\n",
590
+ "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
591
+ "columns_with_na"
592
+ ]
593
+ },
594
+ {
595
+ "cell_type": "code",
596
+ "execution_count": 9,
597
+ "metadata": {},
598
+ "outputs": [
599
+ {
600
+ "data": {
601
+ "text/plain": [
602
+ "Index(['date', 'zone_069_temp', 'zone_069_fan_spd', 'zone_068_temp',\n",
603
+ " 'zone_068_fan_spd', 'zone_067_temp', 'zone_067_fan_spd',\n",
604
+ " 'zone_066_temp', 'zone_066_fan_spd', 'zone_042_temp',\n",
605
+ " ...\n",
606
+ " 'zone_066_heating_sp', 'zone_067_heating_sp', 'zone_069_heating_sp',\n",
607
+ " 'zone_070_heating_sp', 'zone_071_heating_sp', 'air_temp_set_1',\n",
608
+ " 'air_temp_set_2', 'dew_point_temperature_set_1d',\n",
609
+ " 'relative_humidity_set_1', 'solar_radiation_set_1'],\n",
610
+ " dtype='object', length=529)"
611
+ ]
612
+ },
613
+ "execution_count": 9,
614
+ "metadata": {},
615
+ "output_type": "execute_result"
616
+ }
617
+ ],
618
+ "source": [
619
+ "traindataset_df.columns"
620
+ ]
621
+ },
622
+ {
623
+ "cell_type": "code",
624
+ "execution_count": 123,
625
+ "metadata": {},
626
+ "outputs": [
627
+ {
628
+ "name": "stdout",
629
+ "output_type": "stream",
630
+ "text": [
631
+ "0 0\n"
632
+ ]
633
+ }
634
+ ],
635
+ "source": [
636
+ "print(traindataset_df.isna().sum().sum(), testdataset_df.isna().sum().sum())"
637
+ ]
638
+ },
639
+ {
640
+ "cell_type": "code",
641
+ "execution_count": 124,
642
+ "metadata": {},
643
+ "outputs": [
644
+ {
645
+ "data": {
646
+ "text/plain": [
647
+ "(86400, 86400)"
648
+ ]
649
+ },
650
+ "execution_count": 124,
651
+ "metadata": {},
652
+ "output_type": "execute_result"
653
+ }
654
+ ],
655
+ "source": [
656
+ "len(traindataset), len(testdataset)"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": 125,
662
+ "metadata": {},
663
+ "outputs": [],
664
+ "source": [
665
+ "traindataset = traindataset.astype('float32')\n",
666
+ "testdataset = testdataset.astype('float32')\n",
667
+ "\n",
668
+ "scaler = StandardScaler()\n",
669
+ "traindataset = scaler.fit_transform(traindataset)\n",
670
+ "testdataset = scaler.transform(testdataset)"
671
+ ]
672
+ },
673
+ {
674
+ "cell_type": "code",
675
+ "execution_count": 126,
676
+ "metadata": {},
677
+ "outputs": [
678
+ {
679
+ "data": {
680
+ "text/plain": [
681
+ "(86400, 45)"
682
+ ]
683
+ },
684
+ "execution_count": 126,
685
+ "metadata": {},
686
+ "output_type": "execute_result"
687
+ }
688
+ ],
689
+ "source": [
690
+ "traindataset.shape"
691
+ ]
692
+ },
693
+ {
694
+ "cell_type": "code",
695
+ "execution_count": 127,
696
+ "metadata": {},
697
+ "outputs": [],
698
+ "source": [
699
+ "train,test = traindataset,testdataset\n",
700
+ "\n",
701
+ "def create_dataset(dataset,time_step):\n",
702
+ " x = []\n",
703
+ " Y = []\n",
704
+ " for i in range(len(dataset) - time_step - 1):\n",
705
+ " x.append(dataset[i:(i+time_step),:])\n",
706
+ " Y.append(dataset[i+time_step,0:-5])\n",
707
+ " x= np.array(x)\n",
708
+ " Y = np.array(Y)\n",
709
+ " return x,Y\n",
710
+ "time_step = 30\n",
711
+ "X_train, y_train = create_dataset(train, time_step)\n",
712
+ "X_test, y_test = create_dataset(test, time_step)\n",
713
+ "\n"
714
+ ]
715
+ },
716
+ {
717
+ "cell_type": "code",
718
+ "execution_count": 128,
719
+ "metadata": {},
720
+ "outputs": [
721
+ {
722
+ "data": {
723
+ "text/plain": [
724
+ "((86369, 30, 45), (86369, 40))"
725
+ ]
726
+ },
727
+ "execution_count": 128,
728
+ "metadata": {},
729
+ "output_type": "execute_result"
730
+ }
731
+ ],
732
+ "source": [
733
+ "X_train.shape, y_train.shape"
734
+ ]
735
+ },
736
+ {
737
+ "cell_type": "code",
738
+ "execution_count": 133,
739
+ "metadata": {},
740
+ "outputs": [
741
+ {
742
+ "name": "stdout",
743
+ "output_type": "stream",
744
+ "text": [
745
+ "Epoch 1/5\n",
746
+ "674/675 [============================>.] - ETA: 0s - loss: 0.1090\n",
747
+ "Epoch 1: val_loss improved from inf to 0.26433, saving model to lstm_vav_01.tf\n",
748
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
749
+ ]
750
+ },
751
+ {
752
+ "name": "stderr",
753
+ "output_type": "stream",
754
+ "text": [
755
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
756
+ ]
757
+ },
758
+ {
759
+ "name": "stdout",
760
+ "output_type": "stream",
761
+ "text": [
762
+ "675/675 [==============================] - 61s 84ms/step - loss: 0.1089 - val_loss: 0.2643\n",
763
+ "Epoch 2/5\n",
764
+ "675/675 [==============================] - ETA: 0s - loss: 0.0155\n",
765
+ "Epoch 2: val_loss improved from 0.26433 to 0.21391, saving model to lstm_vav_01.tf\n",
766
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
767
+ ]
768
+ },
769
+ {
770
+ "name": "stderr",
771
+ "output_type": "stream",
772
+ "text": [
773
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
774
+ ]
775
+ },
776
+ {
777
+ "name": "stdout",
778
+ "output_type": "stream",
779
+ "text": [
780
+ "675/675 [==============================] - 45s 67ms/step - loss: 0.0155 - val_loss: 0.2139\n",
781
+ "Epoch 3/5\n",
782
+ "675/675 [==============================] - ETA: 0s - loss: 0.0081\n",
783
+ "Epoch 3: val_loss improved from 0.21391 to 0.17155, saving model to lstm_vav_01.tf\n",
784
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
785
+ ]
786
+ },
787
+ {
788
+ "name": "stderr",
789
+ "output_type": "stream",
790
+ "text": [
791
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
792
+ ]
793
+ },
794
+ {
795
+ "name": "stdout",
796
+ "output_type": "stream",
797
+ "text": [
798
+ "675/675 [==============================] - 58s 86ms/step - loss: 0.0081 - val_loss: 0.1716\n",
799
+ "Epoch 4/5\n",
800
+ "675/675 [==============================] - ETA: 0s - loss: 0.0049\n",
801
+ "Epoch 4: val_loss improved from 0.17155 to 0.14438, saving model to lstm_vav_01.tf\n",
802
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
803
+ ]
804
+ },
805
+ {
806
+ "name": "stderr",
807
+ "output_type": "stream",
808
+ "text": [
809
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
810
+ ]
811
+ },
812
+ {
813
+ "name": "stdout",
814
+ "output_type": "stream",
815
+ "text": [
816
+ "675/675 [==============================] - 54s 80ms/step - loss: 0.0049 - val_loss: 0.1444\n",
817
+ "Epoch 5/5\n",
818
+ "675/675 [==============================] - ETA: 0s - loss: 0.0030\n",
819
+ "Epoch 5: val_loss improved from 0.14438 to 0.12414, saving model to lstm_vav_01.tf\n",
820
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
821
+ ]
822
+ },
823
+ {
824
+ "name": "stderr",
825
+ "output_type": "stream",
826
+ "text": [
827
+ "INFO:tensorflow:Assets written to: lstm_vav_01.tf\\assets\n"
828
+ ]
829
+ },
830
+ {
831
+ "name": "stdout",
832
+ "output_type": "stream",
833
+ "text": [
834
+ "675/675 [==============================] - 60s 89ms/step - loss: 0.0030 - val_loss: 0.1241\n"
835
+ ]
836
+ },
837
+ {
838
+ "data": {
839
+ "text/plain": [
840
+ "<keras.src.callbacks.History at 0x1d5bf064950>"
841
+ ]
842
+ },
843
+ "execution_count": 133,
844
+ "metadata": {},
845
+ "output_type": "execute_result"
846
+ }
847
+ ],
848
+ "source": [
849
+ "\n",
850
+ "model = Sequential()\n",
851
+ "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
852
+ "model.add(LSTM(units=50, return_sequences=True))\n",
853
+ "model.add(LSTM(units=30))\n",
854
+ "model.add(Dense(units=y_train.shape[1]))\n",
855
+ "\n",
856
+ "model.compile(optimizer='adam', loss='mean_squared_error')\n",
857
+ "\n",
858
+ "checkpoint_path = \"lstm_vav_01.tf\"\n",
859
+ "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
860
+ "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=128, verbose=1, callbacks=[checkpoint_callback])"
861
+ ]
862
+ },
863
+ {
864
+ "cell_type": "code",
865
+ "execution_count": 134,
866
+ "metadata": {},
867
+ "outputs": [
868
+ {
869
+ "data": {
870
+ "text/plain": [
871
+ "<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1d55c631f10>"
872
+ ]
873
+ },
874
+ "execution_count": 134,
875
+ "metadata": {},
876
+ "output_type": "execute_result"
877
+ }
878
+ ],
879
+ "source": [
880
+ "model.load_weights(checkpoint_path)"
881
+ ]
882
+ },
883
+ {
884
+ "cell_type": "code",
885
+ "execution_count": 135,
886
+ "metadata": {},
887
+ "outputs": [
888
+ {
889
+ "name": "stdout",
890
+ "output_type": "stream",
891
+ "text": [
892
+ "2700/2700 [==============================] - 25s 9ms/step\n"
893
+ ]
894
+ }
895
+ ],
896
+ "source": [
897
+ "test_predict1 = model.predict(X_test)"
898
+ ]
899
+ },
900
+ {
901
+ "cell_type": "code",
902
+ "execution_count": 136,
903
+ "metadata": {},
904
+ "outputs": [
905
+ {
906
+ "data": {
907
+ "text/plain": [
908
+ "[<matplotlib.lines.Line2D at 0x1d5582d61d0>]"
909
+ ]
910
+ },
911
+ "execution_count": 136,
912
+ "metadata": {},
913
+ "output_type": "execute_result"
914
+ }
915
+ ],
916
+ "source": [
917
+ "plt.plot(y_test[:,3])\n",
918
+ "plt.plot(y_train[:,3])"
919
+ ]
920
+ },
921
+ {
922
+ "cell_type": "code",
923
+ "execution_count": 141,
924
+ "metadata": {},
925
+ "outputs": [],
926
+ "source": [
927
+ "%matplotlib qt\n",
928
+ "var = 1\n",
929
+ "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
930
+ "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
931
+ "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)\n",
932
+ "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
933
+ "\n",
934
+ "\n",
935
+ "plt.title('Testing Data - Predicted vs Actual')\n",
936
+ "plt.xlabel('Time')\n",
937
+ "plt.ylabel('Value')\n",
938
+ "plt.legend()\n",
939
+ "plt.show()"
940
+ ]
941
+ },
942
+ {
943
+ "cell_type": "code",
944
+ "execution_count": 18,
945
+ "metadata": {},
946
+ "outputs": [],
947
+ "source": [
948
+ "from sklearn.mixture import GaussianMixture\n",
949
+ "import numpy as np\n",
950
+ "import matplotlib.pyplot as plt\n",
951
+ "from sklearn.decomposition import PCA\n",
952
+ "\n",
953
+ "# Generating random data for demonstration\n",
954
+ "np.random.seed(0)\n",
955
+ "X = test_predict1 - y_test\n",
956
+ "\n",
957
+ "\n",
958
+ "pca = PCA(n_components=2)\n",
959
+ "X = pca.fit_transform(X)\n",
960
+ "\n",
961
+ "\n",
962
+ "# Creating the GMM instance with desired number of clusters\n",
963
+ "gmm = GaussianMixture(n_components=2)\n",
964
+ "\n",
965
+ "# Fitting the model to the data\n",
966
+ "gmm.fit(X)\n",
967
+ "\n",
968
+ "# Getting the cluster labels\n",
969
+ "labels = gmm.predict(X)\n",
970
+ "\n",
971
+ "# Plotting the data points with colors representing different clusters\n",
972
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
973
+ "plt.title('GMM Clustering')\n",
974
+ "plt.xlabel('Feature 1')\n",
975
+ "plt.ylabel('Feature 2')\n",
976
+ "plt.show()\n"
977
+ ]
978
+ },
979
+ {
980
+ "cell_type": "code",
981
+ "execution_count": 19,
982
+ "metadata": {},
983
+ "outputs": [
984
+ {
985
+ "ename": "ValueError",
986
+ "evalue": "operands could not be broadcast together with shapes (199403,51) (8,) ",
987
+ "output_type": "error",
988
+ "traceback": [
989
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
990
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
991
+ "Cell \u001b[1;32mIn[19], line 6\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# Generating random data for demonstration\u001b[39;00m\n\u001b[0;32m 5\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m----> 6\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43m(\u001b[49m\u001b[43mtest_predict1\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mscaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvar_\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m8\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m+\u001b[39m scaler\u001b[38;5;241m.\u001b[39mmean_[\u001b[38;5;241m0\u001b[39m:\u001b[38;5;241m8\u001b[39m]\n\u001b[0;32m 8\u001b[0m k \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m6\u001b[39m\n\u001b[0;32m 10\u001b[0m kmeans \u001b[38;5;241m=\u001b[39m KMeans(n_clusters\u001b[38;5;241m=\u001b[39mk)\n",
992
+ "\u001b[1;31mValueError\u001b[0m: operands could not be broadcast together with shapes (199403,51) (8,) "
993
+ ]
994
+ }
995
+ ],
996
+ "source": [
997
+ "from sklearn.cluster import KMeans\n",
998
+ "import numpy as np\n",
999
+ "import matplotlib.pyplot as plt\n",
1000
+ "# Generating random data for demonstration\n",
1001
+ "np.random.seed(0)\n",
1002
+ "X = (test_predict1 - y_test)\n",
1003
+ "\n",
1004
+ "k = 6\n",
1005
+ "\n",
1006
+ "kmeans = KMeans(n_clusters=k)\n",
1007
+ "\n",
1008
+ "kmeans.fit(X)\n",
1009
+ "\n",
1010
+ "\n",
1011
+ "pca = PCA(n_components=2)\n",
1012
+ "X = pca.fit_transform(X)\n",
1013
+ "\n",
1014
+ "\n",
1015
+ "\n",
1016
+ "# Getting the cluster centers and labels\n",
1017
+ "centroids = kmeans.cluster_centers_\n",
1018
+ "centroids = pca.transform(centroids)\n",
1019
+ "labels = kmeans.labels_\n",
1020
+ "\n",
1021
+ "# Plotting the data points and cluster centers\n",
1022
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1023
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
1024
+ "plt.title('KMeans Clustering')\n",
1025
+ "plt.xlabel('Feature 1')\n",
1026
+ "plt.ylabel('Feature 2')\n",
1027
+ "plt.show()\n"
1028
+ ]
1029
+ },
1030
+ {
1031
+ "cell_type": "code",
1032
+ "execution_count": null,
1033
+ "metadata": {},
1034
+ "outputs": [],
1035
+ "source": [
1036
+ "k = 60\n",
1037
+ "X= test_predict1 - y_test\n",
1038
+ "processed_data = []\n",
1039
+ "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
1040
+ "for i in range(0,len(X), 60):\n",
1041
+ " mean = X[i:i+k].mean(axis = 0)\n",
1042
+ " std = X[i:i+k].std(axis = 0)\n",
1043
+ " max = X[i:i+k].max(axis = 0)\n",
1044
+ " min = X[i:i+k].min(axis = 0)\n",
1045
+ " iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
1046
+ " data = np.concatenate([mean, std, max, min, iqr])\n",
1047
+ " processed_data.append([data])\n",
1048
+ "processed_data = np.concatenate(processed_data,axis=0) "
1049
+ ]
1050
+ },
1051
+ {
1052
+ "cell_type": "code",
1053
+ "execution_count": null,
1054
+ "metadata": {},
1055
+ "outputs": [],
1056
+ "source": [
1057
+ "X = processed_data\n",
1058
+ "\n",
1059
+ "kmeans = KMeans(n_clusters=3, algorithm='elkan', max_iter=1000, n_init = 5)\n",
1060
+ "\n",
1061
+ "kmeans.fit(X)\n",
1062
+ "\n",
1063
+ "pca = PCA(n_components=2)\n",
1064
+ "X = pca.fit_transform(X)\n",
1065
+ "\n",
1066
+ "\n",
1067
+ "# Getting the cluster centers and labels\n",
1068
+ "centroids = kmeans.cluster_centers_\n",
1069
+ "centroids = pca.transform(centroids)\n",
1070
+ "labels = kmeans.labels_\n",
1071
+ "\n",
1072
+ "# Plotting the data points and cluster centers\n",
1073
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1074
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
1075
+ "plt.title('KMeans Clustering')\n",
1076
+ "plt.xlabel('Feature 1')\n",
1077
+ "plt.ylabel('Feature 2')\n",
1078
+ "plt.show()\n"
1079
+ ]
1080
+ },
1081
+ {
1082
+ "cell_type": "code",
1083
+ "execution_count": null,
1084
+ "metadata": {},
1085
+ "outputs": [],
1086
+ "source": [
1087
+ "from sklearn.mixture import GaussianMixture\n",
1088
+ "import numpy as np\n",
1089
+ "import matplotlib.pyplot as plt\n",
1090
+ "from sklearn.decomposition import PCA\n",
1091
+ "\n",
1092
+ "# Generating random data for demonstration\n",
1093
+ "np.random.seed(0)\n",
1094
+ "X = processed_data\n",
1095
+ "\n",
1096
+ "# Creating the GMM instance with desired number of clusters\n",
1097
+ "gmm = GaussianMixture(n_components=3, init_params='k-means++')\n",
1098
+ "\n",
1099
+ "# Fitting the model to the data\n",
1100
+ "gmm.fit(X)\n",
1101
+ "labels = gmm.predict(X)\n",
1102
+ "\n",
1103
+ "\n",
1104
+ "pca = PCA(n_components=2)\n",
1105
+ "X = pca.fit_transform(X)\n",
1106
+ "\n",
1107
+ "\n",
1108
+ "# Getting the cluster labels\n",
1109
+ "\n",
1110
+ "# Plotting the data points with colors representing different clusters\n",
1111
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1112
+ "plt.title('GMM Clustering')\n",
1113
+ "plt.xlabel('Feature 1')\n",
1114
+ "plt.ylabel('Feature 2')\n",
1115
+ "plt.show()\n",
1116
+ "\n"
1117
+ ]
1118
+ },
1119
+ {
1120
+ "cell_type": "code",
1121
+ "execution_count": null,
1122
+ "metadata": {},
1123
+ "outputs": [],
1124
+ "source": [
1125
+ "from sklearn.cluster import KMeans\n",
1126
+ "import numpy as np\n",
1127
+ "import matplotlib.pyplot as plt\n",
1128
+ "# Generating random data for demonstration\n",
1129
+ "np.random.seed(0)\n",
1130
+ "X = (test_predict1 * scaler.var_[0:8] + scaler.mean_[0:8]) - (y_test * scaler.var_[0:8] + scaler.mean_[0:8])\n",
1131
+ "k = 6\n",
1132
+ "\n",
1133
+ "kmeans = KMeans(n_clusters=k)\n",
1134
+ "\n",
1135
+ "kmeans.fit(X)\n",
1136
+ "\n",
1137
+ "\n",
1138
+ "pca = PCA(n_components=2)\n",
1139
+ "X = pca.fit_transform(X)\n",
1140
+ "\n",
1141
+ "\n",
1142
+ "\n",
1143
+ "# Getting the cluster centers and labels\n",
1144
+ "centroids = kmeans.cluster_centers_\n",
1145
+ "centroids = pca.transform(centroids)\n",
1146
+ "labels = kmeans.labels_\n",
1147
+ "\n",
1148
+ "# Plotting the data points and cluster centers\n",
1149
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1150
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
1151
+ "plt.title('KMeans Clustering')\n",
1152
+ "plt.xlabel('Feature 1')\n",
1153
+ "plt.ylabel('Feature 2')\n",
1154
+ "plt.show()\n"
1155
+ ]
1156
+ },
1157
+ {
1158
+ "cell_type": "code",
1159
+ "execution_count": null,
1160
+ "metadata": {},
1161
+ "outputs": [],
1162
+ "source": []
1163
+ }
1164
+ ],
1165
+ "metadata": {
1166
+ "kernelspec": {
1167
+ "display_name": "tensorflow",
1168
+ "language": "python",
1169
+ "name": "python3"
1170
+ },
1171
+ "language_info": {
1172
+ "codemirror_mode": {
1173
+ "name": "ipython",
1174
+ "version": 3
1175
+ },
1176
+ "file_extension": ".py",
1177
+ "mimetype": "text/x-python",
1178
+ "name": "python",
1179
+ "nbconvert_exporter": "python",
1180
+ "pygments_lexer": "ipython3",
1181
+ "version": "3.11.8"
1182
+ }
1183
+ },
1184
+ "nbformat": 4,
1185
+ "nbformat_minor": 2
1186
+ }