akshayballal commited on
Commit
8ced29b
1 Parent(s): d81a75f

Add .tf and data files to .gitignore

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. physLSTM/full_lstm.ipynb +1190 -0
.gitignore CHANGED
@@ -2,3 +2,5 @@ venv
2
  .venv
3
  .vscode
4
  __pycache__/
 
 
 
2
  .venv
3
  .vscode
4
  __pycache__/
5
+ *.tf
6
+ data
physLSTM/full_lstm.ipynb ADDED
@@ -0,0 +1,1190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 56,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd \n",
10
+ "from datetime import datetime \n",
11
+ "from datetime import date\n",
12
+ "import matplotlib.pyplot as plt\n",
13
+ "import numpy as np\n",
14
+ "import pandas as pd\n",
15
+ "from keras.models import Sequential\n",
16
+ "from keras.layers import LSTM, Dense\n",
17
+ "from sklearn.model_selection import train_test_split\n",
18
+ "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
19
+ "from keras.callbacks import ModelCheckpoint\n",
20
+ "import tensorflow as tf"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 57,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "data": {
30
+ "text/plain": [
31
+ "[]"
32
+ ]
33
+ },
34
+ "execution_count": 57,
35
+ "metadata": {},
36
+ "output_type": "execute_result"
37
+ }
38
+ ],
39
+ "source": [
40
+ "import tensorflow as tf\n",
41
+ "tf.config.list_physical_devices('GPU')"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 58,
47
+ "metadata": {},
48
+ "outputs": [
49
+ {
50
+ "data": {
51
+ "text/html": [
52
+ "<div>\n",
53
+ "<style scoped>\n",
54
+ " .dataframe tbody tr th:only-of-type {\n",
55
+ " vertical-align: middle;\n",
56
+ " }\n",
57
+ "\n",
58
+ " .dataframe tbody tr th {\n",
59
+ " vertical-align: top;\n",
60
+ " }\n",
61
+ "\n",
62
+ " .dataframe thead th {\n",
63
+ " text-align: right;\n",
64
+ " }\n",
65
+ "</style>\n",
66
+ "<table border=\"1\" class=\"dataframe\">\n",
67
+ " <thead>\n",
68
+ " <tr style=\"text-align: right;\">\n",
69
+ " <th></th>\n",
70
+ " <th>date</th>\n",
71
+ " <th>zone_047_hw_valve</th>\n",
72
+ " <th>rtu_004_sat_sp_tn</th>\n",
73
+ " <th>zone_047_temp</th>\n",
74
+ " <th>zone_047_fan_spd</th>\n",
75
+ " <th>rtu_004_fltrd_sa_flow_tn</th>\n",
76
+ " <th>rtu_004_sa_temp</th>\n",
77
+ " <th>rtu_004_pa_static_stpt_tn</th>\n",
78
+ " <th>rtu_004_oa_flow_tn</th>\n",
79
+ " <th>rtu_004_oadmpr_pct</th>\n",
80
+ " <th>...</th>\n",
81
+ " <th>zone_047_heating_sp</th>\n",
82
+ " <th>Unnamed: 47_y</th>\n",
83
+ " <th>hvac_S</th>\n",
84
+ " <th>hp_hws_temp</th>\n",
85
+ " <th>aru_001_cwr_temp</th>\n",
86
+ " <th>aru_001_cws_fr_gpm</th>\n",
87
+ " <th>aru_001_cws_temp</th>\n",
88
+ " <th>aru_001_hwr_temp</th>\n",
89
+ " <th>aru_001_hws_fr_gpm</th>\n",
90
+ " <th>aru_001_hws_temp</th>\n",
91
+ " </tr>\n",
92
+ " </thead>\n",
93
+ " <tbody>\n",
94
+ " <tr>\n",
95
+ " <th>0</th>\n",
96
+ " <td>2018-01-01 00:00:00</td>\n",
97
+ " <td>100.0</td>\n",
98
+ " <td>69.0</td>\n",
99
+ " <td>67.5</td>\n",
100
+ " <td>20.0</td>\n",
101
+ " <td>9265.604</td>\n",
102
+ " <td>66.1</td>\n",
103
+ " <td>0.06</td>\n",
104
+ " <td>0.000000</td>\n",
105
+ " <td>28.0</td>\n",
106
+ " <td>...</td>\n",
107
+ " <td>NaN</td>\n",
108
+ " <td>NaN</td>\n",
109
+ " <td>NaN</td>\n",
110
+ " <td>75.3</td>\n",
111
+ " <td>NaN</td>\n",
112
+ " <td>NaN</td>\n",
113
+ " <td>NaN</td>\n",
114
+ " <td>NaN</td>\n",
115
+ " <td>NaN</td>\n",
116
+ " <td>NaN</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>1</th>\n",
120
+ " <td>2018-01-01 00:01:00</td>\n",
121
+ " <td>100.0</td>\n",
122
+ " <td>69.0</td>\n",
123
+ " <td>67.5</td>\n",
124
+ " <td>20.0</td>\n",
125
+ " <td>9265.604</td>\n",
126
+ " <td>66.0</td>\n",
127
+ " <td>0.06</td>\n",
128
+ " <td>6572.099162</td>\n",
129
+ " <td>28.0</td>\n",
130
+ " <td>...</td>\n",
131
+ " <td>NaN</td>\n",
132
+ " <td>NaN</td>\n",
133
+ " <td>NaN</td>\n",
134
+ " <td>75.3</td>\n",
135
+ " <td>NaN</td>\n",
136
+ " <td>NaN</td>\n",
137
+ " <td>NaN</td>\n",
138
+ " <td>NaN</td>\n",
139
+ " <td>NaN</td>\n",
140
+ " <td>NaN</td>\n",
141
+ " </tr>\n",
142
+ " <tr>\n",
143
+ " <th>2</th>\n",
144
+ " <td>2018-01-01 00:02:00</td>\n",
145
+ " <td>100.0</td>\n",
146
+ " <td>69.0</td>\n",
147
+ " <td>67.5</td>\n",
148
+ " <td>20.0</td>\n",
149
+ " <td>9708.240</td>\n",
150
+ " <td>66.1</td>\n",
151
+ " <td>0.06</td>\n",
152
+ " <td>7628.832542</td>\n",
153
+ " <td>28.0</td>\n",
154
+ " <td>...</td>\n",
155
+ " <td>NaN</td>\n",
156
+ " <td>NaN</td>\n",
157
+ " <td>NaN</td>\n",
158
+ " <td>75.3</td>\n",
159
+ " <td>NaN</td>\n",
160
+ " <td>NaN</td>\n",
161
+ " <td>NaN</td>\n",
162
+ " <td>NaN</td>\n",
163
+ " <td>NaN</td>\n",
164
+ " <td>NaN</td>\n",
165
+ " </tr>\n",
166
+ " <tr>\n",
167
+ " <th>3</th>\n",
168
+ " <td>2018-01-01 00:03:00</td>\n",
169
+ " <td>100.0</td>\n",
170
+ " <td>69.0</td>\n",
171
+ " <td>67.5</td>\n",
172
+ " <td>20.0</td>\n",
173
+ " <td>9611.638</td>\n",
174
+ " <td>66.1</td>\n",
175
+ " <td>0.06</td>\n",
176
+ " <td>7710.294617</td>\n",
177
+ " <td>28.0</td>\n",
178
+ " <td>...</td>\n",
179
+ " <td>NaN</td>\n",
180
+ " <td>NaN</td>\n",
181
+ " <td>NaN</td>\n",
182
+ " <td>75.3</td>\n",
183
+ " <td>NaN</td>\n",
184
+ " <td>NaN</td>\n",
185
+ " <td>NaN</td>\n",
186
+ " <td>NaN</td>\n",
187
+ " <td>NaN</td>\n",
188
+ " <td>NaN</td>\n",
189
+ " </tr>\n",
190
+ " <tr>\n",
191
+ " <th>4</th>\n",
192
+ " <td>2018-01-01 00:04:00</td>\n",
193
+ " <td>100.0</td>\n",
194
+ " <td>69.0</td>\n",
195
+ " <td>67.5</td>\n",
196
+ " <td>20.0</td>\n",
197
+ " <td>9215.110</td>\n",
198
+ " <td>66.0</td>\n",
199
+ " <td>0.06</td>\n",
200
+ " <td>7139.184090</td>\n",
201
+ " <td>28.0</td>\n",
202
+ " <td>...</td>\n",
203
+ " <td>NaN</td>\n",
204
+ " <td>NaN</td>\n",
205
+ " <td>NaN</td>\n",
206
+ " <td>75.3</td>\n",
207
+ " <td>NaN</td>\n",
208
+ " <td>NaN</td>\n",
209
+ " <td>NaN</td>\n",
210
+ " <td>NaN</td>\n",
211
+ " <td>NaN</td>\n",
212
+ " <td>NaN</td>\n",
213
+ " </tr>\n",
214
+ " <tr>\n",
215
+ " <th>...</th>\n",
216
+ " <td>...</td>\n",
217
+ " <td>...</td>\n",
218
+ " <td>...</td>\n",
219
+ " <td>...</td>\n",
220
+ " <td>...</td>\n",
221
+ " <td>...</td>\n",
222
+ " <td>...</td>\n",
223
+ " <td>...</td>\n",
224
+ " <td>...</td>\n",
225
+ " <td>...</td>\n",
226
+ " <td>...</td>\n",
227
+ " <td>...</td>\n",
228
+ " <td>...</td>\n",
229
+ " <td>...</td>\n",
230
+ " <td>...</td>\n",
231
+ " <td>...</td>\n",
232
+ " <td>...</td>\n",
233
+ " <td>...</td>\n",
234
+ " <td>...</td>\n",
235
+ " <td>...</td>\n",
236
+ " <td>...</td>\n",
237
+ " </tr>\n",
238
+ " <tr>\n",
239
+ " <th>2072149</th>\n",
240
+ " <td>2020-12-31 23:58:00</td>\n",
241
+ " <td>100.0</td>\n",
242
+ " <td>68.0</td>\n",
243
+ " <td>63.2</td>\n",
244
+ " <td>20.0</td>\n",
245
+ " <td>18884.834</td>\n",
246
+ " <td>64.4</td>\n",
247
+ " <td>0.06</td>\n",
248
+ " <td>2938.320000</td>\n",
249
+ " <td>23.4</td>\n",
250
+ " <td>...</td>\n",
251
+ " <td>71.0</td>\n",
252
+ " <td>69.0</td>\n",
253
+ " <td>23.145000</td>\n",
254
+ " <td>123.8</td>\n",
255
+ " <td>56.25</td>\n",
256
+ " <td>54.71</td>\n",
257
+ " <td>56.4</td>\n",
258
+ " <td>123.42</td>\n",
259
+ " <td>61.6</td>\n",
260
+ " <td>122.36</td>\n",
261
+ " </tr>\n",
262
+ " <tr>\n",
263
+ " <th>2072150</th>\n",
264
+ " <td>2020-12-31 23:58:00</td>\n",
265
+ " <td>100.0</td>\n",
266
+ " <td>68.0</td>\n",
267
+ " <td>63.2</td>\n",
268
+ " <td>20.0</td>\n",
269
+ " <td>18884.834</td>\n",
270
+ " <td>64.4</td>\n",
271
+ " <td>0.06</td>\n",
272
+ " <td>2938.320000</td>\n",
273
+ " <td>23.4</td>\n",
274
+ " <td>...</td>\n",
275
+ " <td>71.0</td>\n",
276
+ " <td>69.0</td>\n",
277
+ " <td>23.145000</td>\n",
278
+ " <td>123.8</td>\n",
279
+ " <td>56.25</td>\n",
280
+ " <td>54.71</td>\n",
281
+ " <td>56.4</td>\n",
282
+ " <td>123.42</td>\n",
283
+ " <td>61.6</td>\n",
284
+ " <td>122.36</td>\n",
285
+ " </tr>\n",
286
+ " <tr>\n",
287
+ " <th>2072151</th>\n",
288
+ " <td>2020-12-31 23:59:00</td>\n",
289
+ " <td>100.0</td>\n",
290
+ " <td>68.0</td>\n",
291
+ " <td>63.2</td>\n",
292
+ " <td>20.0</td>\n",
293
+ " <td>19345.508</td>\n",
294
+ " <td>64.3</td>\n",
295
+ " <td>0.06</td>\n",
296
+ " <td>3154.390000</td>\n",
297
+ " <td>23.4</td>\n",
298
+ " <td>...</td>\n",
299
+ " <td>71.0</td>\n",
300
+ " <td>69.0</td>\n",
301
+ " <td>23.145000</td>\n",
302
+ " <td>123.8</td>\n",
303
+ " <td>56.25</td>\n",
304
+ " <td>54.71</td>\n",
305
+ " <td>56.4</td>\n",
306
+ " <td>123.42</td>\n",
307
+ " <td>61.6</td>\n",
308
+ " <td>122.36</td>\n",
309
+ " </tr>\n",
310
+ " <tr>\n",
311
+ " <th>2072152</th>\n",
312
+ " <td>2020-12-31 23:59:00</td>\n",
313
+ " <td>100.0</td>\n",
314
+ " <td>68.0</td>\n",
315
+ " <td>63.2</td>\n",
316
+ " <td>20.0</td>\n",
317
+ " <td>19345.508</td>\n",
318
+ " <td>64.3</td>\n",
319
+ " <td>0.06</td>\n",
320
+ " <td>3154.390000</td>\n",
321
+ " <td>23.4</td>\n",
322
+ " <td>...</td>\n",
323
+ " <td>71.0</td>\n",
324
+ " <td>69.0</td>\n",
325
+ " <td>23.145000</td>\n",
326
+ " <td>123.8</td>\n",
327
+ " <td>56.25</td>\n",
328
+ " <td>54.71</td>\n",
329
+ " <td>56.4</td>\n",
330
+ " <td>123.42</td>\n",
331
+ " <td>61.6</td>\n",
332
+ " <td>122.36</td>\n",
333
+ " </tr>\n",
334
+ " <tr>\n",
335
+ " <th>2072153</th>\n",
336
+ " <td>2021-01-01 00:00:00</td>\n",
337
+ " <td>100.0</td>\n",
338
+ " <td>68.0</td>\n",
339
+ " <td>63.2</td>\n",
340
+ " <td>20.0</td>\n",
341
+ " <td>18650.232</td>\n",
342
+ " <td>64.1</td>\n",
343
+ " <td>0.06</td>\n",
344
+ " <td>3076.270000</td>\n",
345
+ " <td>22.9</td>\n",
346
+ " <td>...</td>\n",
347
+ " <td>71.0</td>\n",
348
+ " <td>69.0</td>\n",
349
+ " <td>23.788947</td>\n",
350
+ " <td>123.8</td>\n",
351
+ " <td>56.25</td>\n",
352
+ " <td>54.71</td>\n",
353
+ " <td>56.4</td>\n",
354
+ " <td>123.42</td>\n",
355
+ " <td>61.6</td>\n",
356
+ " <td>122.36</td>\n",
357
+ " </tr>\n",
358
+ " </tbody>\n",
359
+ "</table>\n",
360
+ "<p>2072154 rows × 30 columns</p>\n",
361
+ "</div>"
362
+ ],
363
+ "text/plain": [
364
+ " date zone_047_hw_valve rtu_004_sat_sp_tn \\\n",
365
+ "0 2018-01-01 00:00:00 100.0 69.0 \n",
366
+ "1 2018-01-01 00:01:00 100.0 69.0 \n",
367
+ "2 2018-01-01 00:02:00 100.0 69.0 \n",
368
+ "3 2018-01-01 00:03:00 100.0 69.0 \n",
369
+ "4 2018-01-01 00:04:00 100.0 69.0 \n",
370
+ "... ... ... ... \n",
371
+ "2072149 2020-12-31 23:58:00 100.0 68.0 \n",
372
+ "2072150 2020-12-31 23:58:00 100.0 68.0 \n",
373
+ "2072151 2020-12-31 23:59:00 100.0 68.0 \n",
374
+ "2072152 2020-12-31 23:59:00 100.0 68.0 \n",
375
+ "2072153 2021-01-01 00:00:00 100.0 68.0 \n",
376
+ "\n",
377
+ " zone_047_temp zone_047_fan_spd rtu_004_fltrd_sa_flow_tn \\\n",
378
+ "0 67.5 20.0 9265.604 \n",
379
+ "1 67.5 20.0 9265.604 \n",
380
+ "2 67.5 20.0 9708.240 \n",
381
+ "3 67.5 20.0 9611.638 \n",
382
+ "4 67.5 20.0 9215.110 \n",
383
+ "... ... ... ... \n",
384
+ "2072149 63.2 20.0 18884.834 \n",
385
+ "2072150 63.2 20.0 18884.834 \n",
386
+ "2072151 63.2 20.0 19345.508 \n",
387
+ "2072152 63.2 20.0 19345.508 \n",
388
+ "2072153 63.2 20.0 18650.232 \n",
389
+ "\n",
390
+ " rtu_004_sa_temp rtu_004_pa_static_stpt_tn rtu_004_oa_flow_tn \\\n",
391
+ "0 66.1 0.06 0.000000 \n",
392
+ "1 66.0 0.06 6572.099162 \n",
393
+ "2 66.1 0.06 7628.832542 \n",
394
+ "3 66.1 0.06 7710.294617 \n",
395
+ "4 66.0 0.06 7139.184090 \n",
396
+ "... ... ... ... \n",
397
+ "2072149 64.4 0.06 2938.320000 \n",
398
+ "2072150 64.4 0.06 2938.320000 \n",
399
+ "2072151 64.3 0.06 3154.390000 \n",
400
+ "2072152 64.3 0.06 3154.390000 \n",
401
+ "2072153 64.1 0.06 3076.270000 \n",
402
+ "\n",
403
+ " rtu_004_oadmpr_pct ... zone_047_heating_sp Unnamed: 47_y \\\n",
404
+ "0 28.0 ... NaN NaN \n",
405
+ "1 28.0 ... NaN NaN \n",
406
+ "2 28.0 ... NaN NaN \n",
407
+ "3 28.0 ... NaN NaN \n",
408
+ "4 28.0 ... NaN NaN \n",
409
+ "... ... ... ... ... \n",
410
+ "2072149 23.4 ... 71.0 69.0 \n",
411
+ "2072150 23.4 ... 71.0 69.0 \n",
412
+ "2072151 23.4 ... 71.0 69.0 \n",
413
+ "2072152 23.4 ... 71.0 69.0 \n",
414
+ "2072153 22.9 ... 71.0 69.0 \n",
415
+ "\n",
416
+ " hvac_S hp_hws_temp aru_001_cwr_temp aru_001_cws_fr_gpm \\\n",
417
+ "0 NaN 75.3 NaN NaN \n",
418
+ "1 NaN 75.3 NaN NaN \n",
419
+ "2 NaN 75.3 NaN NaN \n",
420
+ "3 NaN 75.3 NaN NaN \n",
421
+ "4 NaN 75.3 NaN NaN \n",
422
+ "... ... ... ... ... \n",
423
+ "2072149 23.145000 123.8 56.25 54.71 \n",
424
+ "2072150 23.145000 123.8 56.25 54.71 \n",
425
+ "2072151 23.145000 123.8 56.25 54.71 \n",
426
+ "2072152 23.145000 123.8 56.25 54.71 \n",
427
+ "2072153 23.788947 123.8 56.25 54.71 \n",
428
+ "\n",
429
+ " aru_001_cws_temp aru_001_hwr_temp aru_001_hws_fr_gpm \\\n",
430
+ "0 NaN NaN NaN \n",
431
+ "1 NaN NaN NaN \n",
432
+ "2 NaN NaN NaN \n",
433
+ "3 NaN NaN NaN \n",
434
+ "4 NaN NaN NaN \n",
435
+ "... ... ... ... \n",
436
+ "2072149 56.4 123.42 61.6 \n",
437
+ "2072150 56.4 123.42 61.6 \n",
438
+ "2072151 56.4 123.42 61.6 \n",
439
+ "2072152 56.4 123.42 61.6 \n",
440
+ "2072153 56.4 123.42 61.6 \n",
441
+ "\n",
442
+ " aru_001_hws_temp \n",
443
+ "0 NaN \n",
444
+ "1 NaN \n",
445
+ "2 NaN \n",
446
+ "3 NaN \n",
447
+ "4 NaN \n",
448
+ "... ... \n",
449
+ "2072149 122.36 \n",
450
+ "2072150 122.36 \n",
451
+ "2072151 122.36 \n",
452
+ "2072152 122.36 \n",
453
+ "2072153 122.36 \n",
454
+ "\n",
455
+ "[2072154 rows x 30 columns]"
456
+ ]
457
+ },
458
+ "execution_count": 58,
459
+ "metadata": {},
460
+ "output_type": "execute_result"
461
+ }
462
+ ],
463
+ "source": [
464
+ "merged = pd.read_csv(r'../data/long_merge.csv')\n",
465
+ "\n",
466
+ "zone = \"47\"\n",
467
+ "\n",
468
+ "if zone in [\"36\", \"37\", \"38\", \"39\", \"40\", \"41\", \"42\", \"64\", \"65\", \"66\", \"67\", \"68\", \"69\", \"70\"]:\n",
469
+ " rtu = \"rtu_001\"\n",
470
+ " wing = \"hvac_N\"\n",
471
+ "elif zone in [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\"]:\n",
472
+ " rtu = \"rtu_003\"\n",
473
+ " wing = \"hvac_S\"\n",
474
+ "elif zone in [\"16\", \"17\", \"21\", \"22\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]:\n",
475
+ " rtu = \"rtu_004\"\n",
476
+ " wing = \"hvac_S\"\n",
477
+ "else:\n",
478
+ " rtu = \"rtu_002\"\n",
479
+ " wing = \"hvac_N\"\n",
480
+ "#merged is the dataframe\n",
481
+ "sorted = merged[[\"date\"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+[\"hp_hws_temp\", \"aru_001_cwr_temp\" , \"aru_001_cws_fr_gpm\" ,\"aru_001_cws_temp\",\"aru_001_hwr_temp\" ,\"aru_001_hws_fr_gpm\" ,\"aru_001_hws_temp\"]]\n",
482
+ "sorted"
483
+ ]
484
+ },
485
+ {
486
+ "cell_type": "code",
487
+ "execution_count": 59,
488
+ "metadata": {},
489
+ "outputs": [
490
+ {
491
+ "data": {
492
+ "text/html": [
493
+ "<div>\n",
494
+ "<style scoped>\n",
495
+ " .dataframe tbody tr th:only-of-type {\n",
496
+ " vertical-align: middle;\n",
497
+ " }\n",
498
+ "\n",
499
+ " .dataframe tbody tr th {\n",
500
+ " vertical-align: top;\n",
501
+ " }\n",
502
+ "\n",
503
+ " .dataframe thead th {\n",
504
+ " text-align: right;\n",
505
+ " }\n",
506
+ "</style>\n",
507
+ "<table border=\"1\" class=\"dataframe\">\n",
508
+ " <thead>\n",
509
+ " <tr style=\"text-align: right;\">\n",
510
+ " <th></th>\n",
511
+ " <th>date</th>\n",
512
+ " <th>hp_hws_temp</th>\n",
513
+ " <th>rtu_003_sat_sp_tn</th>\n",
514
+ " <th>rtu_003_fltrd_sa_flow_tn</th>\n",
515
+ " <th>rtu_003_sa_temp</th>\n",
516
+ " <th>rtu_003_pa_static_stpt_tn</th>\n",
517
+ " <th>rtu_003_oa_flow_tn</th>\n",
518
+ " <th>rtu_003_oadmpr_pct</th>\n",
519
+ " <th>rtu_003_econ_stpt_tn</th>\n",
520
+ " <th>rtu_003_ra_temp</th>\n",
521
+ " <th>...</th>\n",
522
+ " <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
523
+ " <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
524
+ " <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
525
+ " <th>wifi_third_south</th>\n",
526
+ " <th>wifi_fourth_south</th>\n",
527
+ " <th>air_temp_set_1</th>\n",
528
+ " <th>air_temp_set_2</th>\n",
529
+ " <th>dew_point_temperature_set_1d</th>\n",
530
+ " <th>relative_humidity_set_1</th>\n",
531
+ " <th>solar_radiation_set_1</th>\n",
532
+ " </tr>\n",
533
+ " </thead>\n",
534
+ " <tbody>\n",
535
+ " <tr>\n",
536
+ " <th>0</th>\n",
537
+ " <td>2018-01-01 00:00:00</td>\n",
538
+ " <td>75.3</td>\n",
539
+ " <td>65.0</td>\n",
540
+ " <td>13558.539</td>\n",
541
+ " <td>65.5</td>\n",
542
+ " <td>0.6</td>\n",
543
+ " <td>0.000000</td>\n",
544
+ " <td>34.6</td>\n",
545
+ " <td>65.0</td>\n",
546
+ " <td>67.9</td>\n",
547
+ " <td>...</td>\n",
548
+ " <td>49.9</td>\n",
549
+ " <td>0.04</td>\n",
550
+ " <td>0.05</td>\n",
551
+ " <td>NaN</td>\n",
552
+ " <td>NaN</td>\n",
553
+ " <td>11.64</td>\n",
554
+ " <td>11.51</td>\n",
555
+ " <td>8.1</td>\n",
556
+ " <td>79.07</td>\n",
557
+ " <td>86.7</td>\n",
558
+ " </tr>\n",
559
+ " <tr>\n",
560
+ " <th>1</th>\n",
561
+ " <td>2018-01-01 00:01:00</td>\n",
562
+ " <td>75.3</td>\n",
563
+ " <td>65.0</td>\n",
564
+ " <td>13592.909</td>\n",
565
+ " <td>65.6</td>\n",
566
+ " <td>0.6</td>\n",
567
+ " <td>5992.059572</td>\n",
568
+ " <td>34.6</td>\n",
569
+ " <td>65.0</td>\n",
570
+ " <td>67.9</td>\n",
571
+ " <td>...</td>\n",
572
+ " <td>49.4</td>\n",
573
+ " <td>0.04</td>\n",
574
+ " <td>0.04</td>\n",
575
+ " <td>NaN</td>\n",
576
+ " <td>NaN</td>\n",
577
+ " <td>11.64</td>\n",
578
+ " <td>11.51</td>\n",
579
+ " <td>8.1</td>\n",
580
+ " <td>79.07</td>\n",
581
+ " <td>86.7</td>\n",
582
+ " </tr>\n",
583
+ " </tbody>\n",
584
+ "</table>\n",
585
+ "<p>2 rows × 23 columns</p>\n",
586
+ "</div>"
587
+ ],
588
+ "text/plain": [
589
+ " date hp_hws_temp rtu_003_sat_sp_tn \\\n",
590
+ "0 2018-01-01 00:00:00 75.3 65.0 \n",
591
+ "1 2018-01-01 00:01:00 75.3 65.0 \n",
592
+ "\n",
593
+ " rtu_003_fltrd_sa_flow_tn rtu_003_sa_temp rtu_003_pa_static_stpt_tn \\\n",
594
+ "0 13558.539 65.5 0.6 \n",
595
+ "1 13592.909 65.6 0.6 \n",
596
+ "\n",
597
+ " rtu_003_oa_flow_tn rtu_003_oadmpr_pct rtu_003_econ_stpt_tn \\\n",
598
+ "0 0.000000 34.6 65.0 \n",
599
+ "1 5992.059572 34.6 65.0 \n",
600
+ "\n",
601
+ " rtu_003_ra_temp ... rtu_003_rf_vfd_spd_fbk_tn \\\n",
602
+ "0 67.9 ... 49.9 \n",
603
+ "1 67.9 ... 49.4 \n",
604
+ "\n",
605
+ " rtu_003_fltrd_gnd_lvl_plenum_press_tn rtu_003_fltrd_lvl2_plenum_press_tn \\\n",
606
+ "0 0.04 0.05 \n",
607
+ "1 0.04 0.04 \n",
608
+ "\n",
609
+ " wifi_third_south wifi_fourth_south air_temp_set_1 air_temp_set_2 \\\n",
610
+ "0 NaN NaN 11.64 11.51 \n",
611
+ "1 NaN NaN 11.64 11.51 \n",
612
+ "\n",
613
+ " dew_point_temperature_set_1d relative_humidity_set_1 \\\n",
614
+ "0 8.1 79.07 \n",
615
+ "1 8.1 79.07 \n",
616
+ "\n",
617
+ " solar_radiation_set_1 \n",
618
+ "0 86.7 \n",
619
+ "1 86.7 \n",
620
+ "\n",
621
+ "[2 rows x 23 columns]"
622
+ ]
623
+ },
624
+ "execution_count": 59,
625
+ "metadata": {},
626
+ "output_type": "execute_result"
627
+ }
628
+ ],
629
+ "source": [
630
+ "rtu = [\"rtu_003\"]\n",
631
+ "# wing = [\"hvac_N\",\"hvac_S\"]\n",
632
+ "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
633
+ "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
634
+ "[\"rtu_003_ma_temp\",]\n",
635
+ "# any(sub in col for sub in zone) or\n",
636
+ "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n",
637
+ " any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n",
638
+ "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
639
+ "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
640
+ "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
641
+ "# df_filtered = df_filtered.dropna()\n",
642
+ "df_filtered.head(2)"
643
+ ]
644
+ },
645
+ {
646
+ "cell_type": "code",
647
+ "execution_count": 60,
648
+ "metadata": {},
649
+ "outputs": [],
650
+ "source": [
651
+ "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
652
+ "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2018, 1, 1)) & (df_filtered.date.dt.date< date(2021, 1, 1))]\n",
653
+ "# df_filtered.isna().sum()\n",
654
+ "df_filtered = df_filtered.ffill()\n",
655
+ "df_filtered = df_filtered.bfill()\n",
656
+ "if df_filtered.isna().any().any():\n",
657
+ " print(\"There are NA values in the DataFrame columns.\")"
658
+ ]
659
+ },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 61,
663
+ "metadata": {},
664
+ "outputs": [],
665
+ "source": [
666
+ "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n",
667
+ " 'rtu_003_sa_temp',\n",
668
+ " 'rtu_003_oadmpr_pct',\n",
669
+ " 'rtu_003_ra_temp',\n",
670
+ " 'rtu_003_oa_temp',\n",
671
+ " 'rtu_003_ma_temp',\n",
672
+ " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
673
+ " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
674
+ " 'wifi_fourth_south',\n",
675
+ " 'air_temp_set_1',\n",
676
+ " 'air_temp_set_2',\n",
677
+ " 'dew_point_temperature_set_1d',\n",
678
+ " 'relative_humidity_set_1',\n",
679
+ " 'solar_radiation_set_1']]"
680
+ ]
681
+ },
682
+ {
683
+ "cell_type": "code",
684
+ "execution_count": 62,
685
+ "metadata": {},
686
+ "outputs": [
687
+ {
688
+ "data": {
689
+ "text/plain": [
690
+ "[]"
691
+ ]
692
+ },
693
+ "execution_count": 62,
694
+ "metadata": {},
695
+ "output_type": "execute_result"
696
+ }
697
+ ],
698
+ "source": [
699
+ "testdataset_df = df_filtered[(df_filtered.date.dt.date >date(2020, 3, 11))]\n",
700
+ "\n",
701
+ "# traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
702
+ "\n",
703
+ "traindataset_df = df_filtered[ (df_filtered.date.dt.date <date(2020, 3, 11))]\n",
704
+ "testdataset = testdataset_df.drop(columns=[\"date\"]).rolling(window = 10, step=5, min_periods=1).mean().values\n",
705
+ "\n",
706
+ "traindataset = traindataset_df.drop(columns=[\"date\"]).rolling(window = 10, step=5, min_periods=1).mean().values\n",
707
+ "\n",
708
+ "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
709
+ "columns_with_na"
710
+ ]
711
+ },
712
+ {
713
+ "cell_type": "code",
714
+ "execution_count": 63,
715
+ "metadata": {},
716
+ "outputs": [
717
+ {
718
+ "data": {
719
+ "text/plain": [
720
+ "(1157787, 909910)"
721
+ ]
722
+ },
723
+ "execution_count": 63,
724
+ "metadata": {},
725
+ "output_type": "execute_result"
726
+ }
727
+ ],
728
+ "source": [
729
+ "len(traindataset_df), len(testdataset_df)"
730
+ ]
731
+ },
732
+ {
733
+ "cell_type": "code",
734
+ "execution_count": 64,
735
+ "metadata": {},
736
+ "outputs": [],
737
+ "source": [
738
+ "traindataset = traindataset.astype('float32')\n",
739
+ "testdataset = testdataset.astype('float32')\n",
740
+ "\n",
741
+ "scaler = StandardScaler()\n",
742
+ "traindataset = scaler.fit_transform(traindataset)\n",
743
+ "testdataset = scaler.transform(testdataset)"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": 65,
749
+ "metadata": {},
750
+ "outputs": [],
751
+ "source": [
752
+ "train,test = traindataset,testdataset\n",
753
+ "\n",
754
+ "def create_dataset(dataset,time_step):\n",
755
+ " x = [[] for _ in range(15)] \n",
756
+ " Y = []\n",
757
+ " for i in range(len(dataset) - time_step - 1):\n",
758
+ " for j in range(15):\n",
759
+ " x[j].append(dataset[i:(i + time_step), j])\n",
760
+ " Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n",
761
+ " dataset[i + time_step, 6],dataset[i + time_step, 7]])\n",
762
+ " x= [np.array(feature_list) for feature_list in x]\n",
763
+ " Y = np.reshape(Y,(len(Y),8))\n",
764
+ " return np.stack(x,axis=2),Y\n",
765
+ "\n",
766
+ "time_step = 30\n",
767
+ "X_train, y_train = create_dataset(train, time_step)\n",
768
+ "X_test, y_test = create_dataset(test, time_step)\n",
769
+ "\n",
770
+ "\n",
771
+ "model = Sequential()\n",
772
+ "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
773
+ "model.add(LSTM(units=50, return_sequences=True))\n",
774
+ "model.add(LSTM(units=30))\n",
775
+ "model.add(Dense(units=8))\n",
776
+ "\n",
777
+ "model.compile(optimizer='adam', loss='mean_squared_error')\n",
778
+ "\n",
779
+ "checkpoint_path = \"lstm_smooth_01.tf\"\n",
780
+ "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
781
+ "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
782
+ ]
783
+ },
784
+ {
785
+ "cell_type": "code",
786
+ "execution_count": null,
787
+ "metadata": {},
788
+ "outputs": [
789
+ {
790
+ "data": {
791
+ "text/plain": [
792
+ "<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1b2861bd190>"
793
+ ]
794
+ },
795
+ "execution_count": 11,
796
+ "metadata": {},
797
+ "output_type": "execute_result"
798
+ }
799
+ ],
800
+ "source": [
801
+ "model.load_weights(checkpoint_path)"
802
+ ]
803
+ },
804
+ {
805
+ "cell_type": "code",
806
+ "execution_count": null,
807
+ "metadata": {},
808
+ "outputs": [
809
+ {
810
+ "name": "stdout",
811
+ "output_type": "stream",
812
+ "text": [
813
+ "5686/5686 [==============================] - 27s 5ms/step\n"
814
+ ]
815
+ }
816
+ ],
817
+ "source": [
818
+ "test_predict1 = model.predict(X_test)"
819
+ ]
820
+ },
821
+ {
822
+ "cell_type": "code",
823
+ "execution_count": null,
824
+ "metadata": {},
825
+ "outputs": [],
826
+ "source": [
827
+ "%matplotlib qt\n",
828
+ "var = 3\n",
829
+ "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
830
+ "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
831
+ "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)\n",
832
+ "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
833
+ "\n",
834
+ "\n",
835
+ "plt.title('Testing Data - Predicted vs Actual')\n",
836
+ "plt.xlabel('Time')\n",
837
+ "plt.ylabel('Value')\n",
838
+ "plt.legend()\n",
839
+ "plt.show()"
840
+ ]
841
+ },
842
+ {
843
+ "cell_type": "code",
844
+ "execution_count": null,
845
+ "metadata": {},
846
+ "outputs": [],
847
+ "source": [
848
+ "from sklearn.mixture import GaussianMixture\n",
849
+ "import numpy as np\n",
850
+ "import matplotlib.pyplot as plt\n",
851
+ "from sklearn.decomposition import PCA\n",
852
+ "\n",
853
+ "# Generating random data for demonstration\n",
854
+ "np.random.seed(0)\n",
855
+ "X = test_predict1 - y_test\n",
856
+ "\n",
857
+ "\n",
858
+ "pca = PCA(n_components=2)\n",
859
+ "X = pca.fit_transform(X)\n",
860
+ "\n",
861
+ "\n",
862
+ "# Creating the GMM instance with desired number of clusters\n",
863
+ "gmm = GaussianMixture(n_components=2)\n",
864
+ "\n",
865
+ "# Fitting the model to the data\n",
866
+ "gmm.fit(X)\n",
867
+ "\n",
868
+ "# Getting the cluster labels\n",
869
+ "labels = gmm.predict(X)\n",
870
+ "\n",
871
+ "# Plotting the data points with colors representing different clusters\n",
872
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
873
+ "plt.title('GMM Clustering')\n",
874
+ "plt.xlabel('Feature 1')\n",
875
+ "plt.ylabel('Feature 2')\n",
876
+ "plt.show()\n"
877
+ ]
878
+ },
879
+ {
880
+ "cell_type": "code",
881
+ "execution_count": null,
882
+ "metadata": {},
883
+ "outputs": [],
884
+ "source": [
885
+ "from sklearn.cluster import KMeans\n",
886
+ "import numpy as np\n",
887
+ "import matplotlib.pyplot as plt\n",
888
+ "# Generating random data for demonstration\n",
889
+ "np.random.seed(0)\n",
890
+ "X = (test_predict1 - y_test) * scaler.var_[0:8] + scaler.mean_[0:8]\n",
891
+ "\n",
892
+ "k = 6\n",
893
+ "\n",
894
+ "kmeans = KMeans(n_clusters=k)\n",
895
+ "\n",
896
+ "kmeans.fit(X)\n",
897
+ "\n",
898
+ "\n",
899
+ "pca = PCA(n_components=2)\n",
900
+ "X = pca.fit_transform(X)\n",
901
+ "\n",
902
+ "\n",
903
+ "\n",
904
+ "# Getting the cluster centers and labels\n",
905
+ "centroids = kmeans.cluster_centers_\n",
906
+ "centroids = pca.transform(centroids)\n",
907
+ "labels = kmeans.labels_\n",
908
+ "\n",
909
+ "# Plotting the data points and cluster centers\n",
910
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
911
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
912
+ "plt.title('KMeans Clustering')\n",
913
+ "plt.xlabel('Feature 1')\n",
914
+ "plt.ylabel('Feature 2')\n",
915
+ "plt.show()\n"
916
+ ]
917
+ },
918
+ {
919
+ "cell_type": "code",
920
+ "execution_count": null,
921
+ "metadata": {},
922
+ "outputs": [],
923
+ "source": [
924
+ "k = 60\n",
925
+ "X= test_predict1 - y_test\n",
926
+ "processed_data = []\n",
927
+ "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
928
+ "for i in range(0,len(X), 60):\n",
929
+ " mean = X[i:i+k].mean(axis = 0)\n",
930
+ " std = X[i:i+k].std(axis = 0)\n",
931
+ " max = X[i:i+k].max(axis = 0)\n",
932
+ " min = X[i:i+k].min(axis = 0)\n",
933
+ " iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
934
+ " data = np.concatenate([mean, std, max, min, iqr])\n",
935
+ " processed_data.append([data])\n",
936
+ "processed_data = np.concatenate(processed_data,axis=0) "
937
+ ]
938
+ },
939
+ {
940
+ "cell_type": "code",
941
+ "execution_count": null,
942
+ "metadata": {},
943
+ "outputs": [],
944
+ "source": [
945
+ "X = processed_data\n",
946
+ "\n",
947
+ "kmeans = KMeans(n_clusters=3, algorithm='elkan', max_iter=1000, n_init = 5)\n",
948
+ "\n",
949
+ "kmeans.fit(X)\n",
950
+ "\n",
951
+ "pca = PCA(n_components=2)\n",
952
+ "X = pca.fit_transform(X)\n",
953
+ "\n",
954
+ "\n",
955
+ "# Getting the cluster centers and labels\n",
956
+ "centroids = kmeans.cluster_centers_\n",
957
+ "centroids = pca.transform(centroids)\n",
958
+ "labels = kmeans.labels_\n",
959
+ "\n",
960
+ "# Plotting the data points and cluster centers\n",
961
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
962
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
963
+ "plt.title('KMeans Clustering')\n",
964
+ "plt.xlabel('Feature 1')\n",
965
+ "plt.ylabel('Feature 2')\n",
966
+ "plt.show()\n"
967
+ ]
968
+ },
969
+ {
970
+ "cell_type": "code",
971
+ "execution_count": null,
972
+ "metadata": {},
973
+ "outputs": [],
974
+ "source": [
975
+ "from sklearn.mixture import GaussianMixture\n",
976
+ "import numpy as np\n",
977
+ "import matplotlib.pyplot as plt\n",
978
+ "from sklearn.decomposition import PCA\n",
979
+ "\n",
980
+ "# Generating random data for demonstration\n",
981
+ "np.random.seed(0)\n",
982
+ "X = processed_data\n",
983
+ "\n",
984
+ "# Creating the GMM instance with desired number of clusters\n",
985
+ "gmm = GaussianMixture(n_components=3, init_params='k-means++')\n",
986
+ "\n",
987
+ "# Fitting the model to the data\n",
988
+ "gmm.fit(X)\n",
989
+ "labels = gmm.predict(X)\n",
990
+ "\n",
991
+ "\n",
992
+ "pca = PCA(n_components=2)\n",
993
+ "X = pca.fit_transform(X)\n",
994
+ "\n",
995
+ "\n",
996
+ "# Getting the cluster labels\n",
997
+ "\n",
998
+ "# Plotting the data points with colors representing different clusters\n",
999
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1000
+ "plt.title('GMM Clustering')\n",
1001
+ "plt.xlabel('Feature 1')\n",
1002
+ "plt.ylabel('Feature 2')\n",
1003
+ "plt.show()\n",
1004
+ "\n"
1005
+ ]
1006
+ },
1007
+ {
1008
+ "cell_type": "code",
1009
+ "execution_count": null,
1010
+ "metadata": {},
1011
+ "outputs": [
1012
+ {
1013
+ "data": {
1014
+ "text/plain": [
1015
+ "(181982, 15)"
1016
+ ]
1017
+ },
1018
+ "execution_count": 26,
1019
+ "metadata": {},
1020
+ "output_type": "execute_result"
1021
+ }
1022
+ ],
1023
+ "source": [
1024
+ "testdataset.shape"
1025
+ ]
1026
+ },
1027
+ {
1028
+ "cell_type": "code",
1029
+ "execution_count": null,
1030
+ "metadata": {},
1031
+ "outputs": [
1032
+ {
1033
+ "data": {
1034
+ "text/plain": [
1035
+ "(181951, 8)"
1036
+ ]
1037
+ },
1038
+ "execution_count": 28,
1039
+ "metadata": {},
1040
+ "output_type": "execute_result"
1041
+ }
1042
+ ],
1043
+ "source": [
1044
+ "test_predict1.shape"
1045
+ ]
1046
+ },
1047
+ {
1048
+ "cell_type": "code",
1049
+ "execution_count": null,
1050
+ "metadata": {},
1051
+ "outputs": [
1052
+ {
1053
+ "data": {
1054
+ "text/plain": [
1055
+ "array([108.04575472, 65.85715493, 47.79928153, 71.09534962,\n",
1056
+ " 56.33539828, 67.06136834, 73.87258151, 51.46057509,\n",
1057
+ " 32.91318188, 28.12291834, 13.58804695, 13.24250204,\n",
1058
+ " 6.3366788 , 66.41283778, 176.8329019 ])"
1059
+ ]
1060
+ },
1061
+ "execution_count": 30,
1062
+ "metadata": {},
1063
+ "output_type": "execute_result"
1064
+ }
1065
+ ],
1066
+ "source": [
1067
+ "scaler.mean_"
1068
+ ]
1069
+ },
1070
+ {
1071
+ "cell_type": "code",
1072
+ "execution_count": null,
1073
+ "metadata": {},
1074
+ "outputs": [
1075
+ {
1076
+ "data": {
1077
+ "text/plain": [
1078
+ "array([2.23555351e+02, 4.88454343e+00, 6.76207201e+02, 3.86856317e+00,\n",
1079
+ " 6.72235289e+01, 7.04553897e+00, 2.03829988e+02, 1.46671335e+02,\n",
1080
+ " 1.53229114e+02, 1.01090815e+02, 2.37177860e+01, 1.97707428e+01,\n",
1081
+ " 2.76565556e+01, 4.60824153e+02, 6.83930692e+04])"
1082
+ ]
1083
+ },
1084
+ "execution_count": 31,
1085
+ "metadata": {},
1086
+ "output_type": "execute_result"
1087
+ }
1088
+ ],
1089
+ "source": [
1090
+ "scaler.var_"
1091
+ ]
1092
+ },
1093
+ {
1094
+ "cell_type": "code",
1095
+ "execution_count": null,
1096
+ "metadata": {},
1097
+ "outputs": [
1098
+ {
1099
+ "data": {
1100
+ "text/plain": [
1101
+ "array([[109.83607997, 65.7232677 , 102.42839746, ..., 67.14066092,\n",
1102
+ " 90.56450819, 66.22438437],\n",
1103
+ " [100.28441846, 66.40819637, 123.52383974, ..., 68.39884677,\n",
1104
+ " 71.74945776, 60.3140524 ],\n",
1105
+ " [100.83776313, 65.46071865, -55.82973994, ..., 66.55045523,\n",
1106
+ " 64.49064254, 66.48224704],\n",
1107
+ " ...,\n",
1108
+ " [ 70.86386298, 65.98717901, 118.99624806, ..., 67.35991191,\n",
1109
+ " 43.36234531, 29.05084393],\n",
1110
+ " [ 71.26526339, 65.9891675 , 118.33246354, ..., 67.25223838,\n",
1111
+ " 50.88386299, 46.49937637],\n",
1112
+ " [ 71.28495765, 65.85019898, 114.35237621, ..., 67.29575831,\n",
1113
+ " 40.09704965, 20.1328048 ]])"
1114
+ ]
1115
+ },
1116
+ "execution_count": 34,
1117
+ "metadata": {},
1118
+ "output_type": "execute_result"
1119
+ }
1120
+ ],
1121
+ "source": []
1122
+ },
1123
+ {
1124
+ "cell_type": "code",
1125
+ "execution_count": null,
1126
+ "metadata": {},
1127
+ "outputs": [],
1128
+ "source": [
1129
+ "from sklearn.cluster import KMeans\n",
1130
+ "import numpy as np\n",
1131
+ "import matplotlib.pyplot as plt\n",
1132
+ "# Generating random data for demonstration\n",
1133
+ "np.random.seed(0)\n",
1134
+ "X = (test_predict1-y_test) * scaler.var_[0:8] + scaler.mean_[0:8]\n",
1135
+ "k = 6\n",
1136
+ "\n",
1137
+ "kmeans = KMeans(n_clusters=k)\n",
1138
+ "\n",
1139
+ "kmeans.fit(X)\n",
1140
+ "\n",
1141
+ "\n",
1142
+ "pca = PCA(n_components=2)\n",
1143
+ "X = pca.fit_transform(X)\n",
1144
+ "\n",
1145
+ "\n",
1146
+ "\n",
1147
+ "# Getting the cluster centers and labels\n",
1148
+ "centroids = kmeans.cluster_centers_\n",
1149
+ "centroids = pca.transform(centroids)\n",
1150
+ "labels = kmeans.labels_\n",
1151
+ "\n",
1152
+ "# Plotting the data points and cluster centers\n",
1153
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1154
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
1155
+ "plt.title('KMeans Clustering')\n",
1156
+ "plt.xlabel('Feature 1')\n",
1157
+ "plt.ylabel('Feature 2')\n",
1158
+ "plt.show()\n"
1159
+ ]
1160
+ },
1161
+ {
1162
+ "cell_type": "code",
1163
+ "execution_count": null,
1164
+ "metadata": {},
1165
+ "outputs": [],
1166
+ "source": []
1167
+ }
1168
+ ],
1169
+ "metadata": {
1170
+ "kernelspec": {
1171
+ "display_name": "tensorflow",
1172
+ "language": "python",
1173
+ "name": "python3"
1174
+ },
1175
+ "language_info": {
1176
+ "codemirror_mode": {
1177
+ "name": "ipython",
1178
+ "version": 3
1179
+ },
1180
+ "file_extension": ".py",
1181
+ "mimetype": "text/x-python",
1182
+ "name": "python",
1183
+ "nbconvert_exporter": "python",
1184
+ "pygments_lexer": "ipython3",
1185
+ "version": "3.11.8"
1186
+ }
1187
+ },
1188
+ "nbformat": 4,
1189
+ "nbformat_minor": 2
1190
+ }