jerin commited on
Commit
d81a75f
1 Parent(s): 81c7365

update lstm

Browse files
Files changed (1) hide show
  1. lstm.ipynb +1153 -253
lstm.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -16,7 +16,7 @@
16
  "from keras.models import Sequential\n",
17
  "from keras.layers import LSTM, Dense\n",
18
  "from sklearn.model_selection import train_test_split\n",
19
- "from sklearn.preprocessing import MinMaxScaler\n",
20
  "from keras.callbacks import ModelCheckpoint\n"
21
  ]
22
  },
@@ -463,7 +463,7 @@
463
  },
464
  {
465
  "cell_type": "code",
466
- "execution_count": 3,
467
  "metadata": {},
468
  "outputs": [
469
  {
@@ -502,7 +502,7 @@
502
  "dtype: int64"
503
  ]
504
  },
505
- "execution_count": 3,
506
  "metadata": {},
507
  "output_type": "execute_result"
508
  }
@@ -782,7 +782,7 @@
782
  },
783
  {
784
  "cell_type": "code",
785
- "execution_count": 3,
786
  "metadata": {},
787
  "outputs": [
788
  {
@@ -807,24 +807,13 @@
807
  " <tr style=\"text-align: right;\">\n",
808
  " <th></th>\n",
809
  " <th>date</th>\n",
810
- " <th>rtu_001_sat_sp_tn</th>\n",
811
- " <th>rtu_002_sat_sp_tn</th>\n",
812
- " <th>rtu_003_sat_sp_tn</th>\n",
813
- " <th>rtu_004_sat_sp_tn</th>\n",
814
- " <th>rtu_001_fltrd_sa_flow_tn</th>\n",
815
- " <th>rtu_002_fltrd_sa_flow_tn</th>\n",
816
- " <th>rtu_003_fltrd_sa_flow_tn</th>\n",
817
- " <th>rtu_004_fltrd_sa_flow_tn</th>\n",
818
- " <th>rtu_001_sa_temp</th>\n",
819
- " <th>...</th>\n",
820
- " <th>rtu_001_fltrd_gnd_lvl_plenum_press_tn</th>\n",
821
- " <th>rtu_002_fltrd_gnd_lvl_plenum_press_tn</th>\n",
822
- " <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
823
- " <th>rtu_004_fltrd_gnd_lvl_plenum_press_tn</th>\n",
824
- " <th>rtu_001_fltrd_lvl2_plenum_press_tn</th>\n",
825
- " <th>rtu_002_fltrd_lvl2_plenum_press_tn</th>\n",
826
- " <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
827
- " <th>rtu_004_fltrd_lvl2_plenum_press_tn</th>\n",
828
  " <th>hvac_N</th>\n",
829
  " <th>hvac_S</th>\n",
830
  " </tr>\n",
@@ -833,101 +822,48 @@
833
  " <tr>\n",
834
  " <th>0</th>\n",
835
  " <td>2018-01-01 00:00:00</td>\n",
836
- " <td>68.0</td>\n",
837
- " <td>70.0</td>\n",
838
- " <td>65.0</td>\n",
839
- " <td>69.0</td>\n",
840
- " <td>14131.449</td>\n",
841
- " <td>13998.757</td>\n",
842
- " <td>13558.539</td>\n",
843
- " <td>9265.604</td>\n",
844
- " <td>67.6</td>\n",
845
- " <td>...</td>\n",
846
- " <td>0.030</td>\n",
847
- " <td>0.04</td>\n",
848
- " <td>0.04</td>\n",
849
- " <td>0.047</td>\n",
850
- " <td>0.050</td>\n",
851
- " <td>0.05</td>\n",
852
- " <td>0.05</td>\n",
853
- " <td>0.050</td>\n",
854
  " <td>NaN</td>\n",
855
  " <td>NaN</td>\n",
856
  " </tr>\n",
857
  " <tr>\n",
858
  " <th>1</th>\n",
859
  " <td>2018-01-01 00:01:00</td>\n",
860
- " <td>68.0</td>\n",
861
- " <td>70.0</td>\n",
862
- " <td>65.0</td>\n",
863
- " <td>69.0</td>\n",
864
- " <td>14164.429</td>\n",
865
- " <td>14065.259</td>\n",
866
- " <td>13592.909</td>\n",
867
- " <td>9265.604</td>\n",
868
- " <td>67.6</td>\n",
869
- " <td>...</td>\n",
870
- " <td>0.031</td>\n",
871
- " <td>0.04</td>\n",
872
- " <td>0.04</td>\n",
873
- " <td>0.043</td>\n",
874
- " <td>0.048</td>\n",
875
- " <td>0.05</td>\n",
876
- " <td>0.04</td>\n",
877
- " <td>0.046</td>\n",
878
  " <td>NaN</td>\n",
879
  " <td>NaN</td>\n",
880
  " </tr>\n",
881
  " </tbody>\n",
882
  "</table>\n",
883
- "<p>2 rows × 59 columns</p>\n",
884
  "</div>"
885
  ],
886
  "text/plain": [
887
- " date rtu_001_sat_sp_tn rtu_002_sat_sp_tn \\\n",
888
- "0 2018-01-01 00:00:00 68.0 70.0 \n",
889
- "1 2018-01-01 00:01:00 68.0 70.0 \n",
890
- "\n",
891
- " rtu_003_sat_sp_tn rtu_004_sat_sp_tn rtu_001_fltrd_sa_flow_tn \\\n",
892
- "0 65.0 69.0 14131.449 \n",
893
- "1 65.0 69.0 14164.429 \n",
894
- "\n",
895
- " rtu_002_fltrd_sa_flow_tn rtu_003_fltrd_sa_flow_tn \\\n",
896
- "0 13998.757 13558.539 \n",
897
- "1 14065.259 13592.909 \n",
898
- "\n",
899
- " rtu_004_fltrd_sa_flow_tn rtu_001_sa_temp ... \\\n",
900
- "0 9265.604 67.6 ... \n",
901
- "1 9265.604 67.6 ... \n",
902
  "\n",
903
- " rtu_001_fltrd_gnd_lvl_plenum_press_tn \\\n",
904
- "0 0.030 \n",
905
- "1 0.031 \n",
906
  "\n",
907
- " rtu_002_fltrd_gnd_lvl_plenum_press_tn \\\n",
908
- "0 0.04 \n",
909
- "1 0.04 \n",
910
- "\n",
911
- " rtu_003_fltrd_gnd_lvl_plenum_press_tn \\\n",
912
- "0 0.04 \n",
913
- "1 0.04 \n",
914
- "\n",
915
- " rtu_004_fltrd_gnd_lvl_plenum_press_tn rtu_001_fltrd_lvl2_plenum_press_tn \\\n",
916
- "0 0.047 0.050 \n",
917
- "1 0.043 0.048 \n",
918
- "\n",
919
- " rtu_002_fltrd_lvl2_plenum_press_tn rtu_003_fltrd_lvl2_plenum_press_tn \\\n",
920
- "0 0.05 0.05 \n",
921
- "1 0.05 0.04 \n",
922
- "\n",
923
- " rtu_004_fltrd_lvl2_plenum_press_tn hvac_N hvac_S \n",
924
- "0 0.050 NaN NaN \n",
925
- "1 0.046 NaN NaN \n",
926
- "\n",
927
- "[2 rows x 59 columns]"
928
  ]
929
  },
930
- "execution_count": 3,
931
  "metadata": {},
932
  "output_type": "execute_result"
933
  }
@@ -936,8 +872,11 @@
936
  "zone = [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\",\"16\", \"17\", \"21\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]\n",
937
  "rtu = [\"rtu_001\",\"rtu_002\",\"rtu_003\",\"rtu_004\"]\n",
938
  "wing = [\"hvac_N\",\"hvac_S\"]\n",
 
 
939
  "# any(sub in col for sub in zone) or\n",
940
- "energy_data = merged[[\"date\"]+[col for col in merged.columns if any(sub in col for sub in wing) or any(sub in col for sub in rtu)]]\n",
 
941
  "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
942
  "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
943
  "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
@@ -947,7 +886,7 @@
947
  },
948
  {
949
  "cell_type": "code",
950
- "execution_count": 4,
951
  "metadata": {},
952
  "outputs": [
953
  {
@@ -968,7 +907,7 @@
968
  },
969
  {
970
  "cell_type": "code",
971
- "execution_count": 5,
972
  "metadata": {},
973
  "outputs": [
974
  {
@@ -977,7 +916,7 @@
977
  "[]"
978
  ]
979
  },
980
- "execution_count": 5,
981
  "metadata": {},
982
  "output_type": "execute_result"
983
  }
@@ -997,7 +936,7 @@
997
  },
998
  {
999
  "cell_type": "code",
1000
- "execution_count": 6,
1001
  "metadata": {},
1002
  "outputs": [],
1003
  "source": [
@@ -1011,90 +950,34 @@
1011
  },
1012
  {
1013
  "cell_type": "code",
1014
- "execution_count": 7,
1015
  "metadata": {},
1016
  "outputs": [
1017
- {
1018
- "name": "stderr",
1019
- "output_type": "stream",
1020
- "text": [
1021
- "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
1022
- " super().__init__(**kwargs)\n"
1023
- ]
1024
- },
1025
  {
1026
  "name": "stdout",
1027
  "output_type": "stream",
1028
  "text": [
1029
- "Epoch 1/15\n",
1030
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 0.0038\n",
1031
- "Epoch 1: val_loss improved from inf to 0.00894, saving model to lstm3.keras\n",
1032
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m144s\u001b[0m 44ms/step - loss: 0.0038 - val_loss: 0.0089\n",
1033
- "Epoch 2/15\n",
1034
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - loss: 5.4854e-04\n",
1035
- "Epoch 2: val_loss improved from 0.00894 to 0.00529, saving model to lstm3.keras\n",
1036
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m137s\u001b[0m 43ms/step - loss: 5.4854e-04 - val_loss: 0.0053\n",
1037
- "Epoch 3/15\n",
1038
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 5.0405e-04\n",
1039
- "Epoch 3: val_loss did not improve from 0.00529\n",
1040
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 5.0405e-04 - val_loss: 0.0063\n",
1041
- "Epoch 4/15\n",
1042
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.9573e-04\n",
1043
- "Epoch 4: val_loss did not improve from 0.00529\n",
1044
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m131s\u001b[0m 41ms/step - loss: 4.9572e-04 - val_loss: 0.0061\n",
1045
- "Epoch 5/15\n",
1046
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - loss: 4.9666e-04\n",
1047
- "Epoch 5: val_loss did not improve from 0.00529\n",
1048
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m135s\u001b[0m 42ms/step - loss: 4.9665e-04 - val_loss: 0.0058\n",
1049
- "Epoch 6/15\n",
1050
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.7853e-04\n",
1051
- "Epoch 6: val_loss improved from 0.00529 to 0.00512, saving model to lstm3.keras\n",
1052
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.7852e-04 - val_loss: 0.0051\n",
1053
- "Epoch 7/15\n",
1054
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step - loss: 4.3858e-04\n",
1055
- "Epoch 7: val_loss improved from 0.00512 to 0.00386, saving model to lstm3.keras\n",
1056
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.3859e-04 - val_loss: 0.0039\n",
1057
- "Epoch 8/15\n",
1058
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.4643e-04\n",
1059
- "Epoch 8: val_loss improved from 0.00386 to 0.00321, saving model to lstm3.keras\n",
1060
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.4643e-04 - val_loss: 0.0032\n",
1061
- "Epoch 9/15\n",
1062
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.3562e-04\n",
1063
- "Epoch 9: val_loss improved from 0.00321 to 0.00267, saving model to lstm3.keras\n",
1064
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.3562e-04 - val_loss: 0.0027\n",
1065
- "Epoch 10/15\n",
1066
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.3336e-04\n",
1067
- "Epoch 10: val_loss did not improve from 0.00267\n",
1068
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━���━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.3336e-04 - val_loss: 0.0029\n",
1069
- "Epoch 11/15\n",
1070
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.2932e-04\n",
1071
- "Epoch 11: val_loss did not improve from 0.00267\n",
1072
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.2932e-04 - val_loss: 0.0032\n",
1073
- "Epoch 12/15\n",
1074
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.1954e-04\n",
1075
- "Epoch 12: val_loss improved from 0.00267 to 0.00248, saving model to lstm3.keras\n",
1076
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 40ms/step - loss: 4.1954e-04 - val_loss: 0.0025\n",
1077
- "Epoch 13/15\n",
1078
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step - loss: 4.2671e-04\n",
1079
- "Epoch 13: val_loss improved from 0.00248 to 0.00245, saving model to lstm3.keras\n",
1080
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 40ms/step - loss: 4.2671e-04 - val_loss: 0.0024\n",
1081
- "Epoch 14/15\n",
1082
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.1718e-04\n",
1083
- "Epoch 14: val_loss did not improve from 0.00245\n",
1084
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.1717e-04 - val_loss: 0.0031\n",
1085
- "Epoch 15/15\n",
1086
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - loss: 4.0550e-04\n",
1087
- "Epoch 15: val_loss did not improve from 0.00245\n",
1088
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m129s\u001b[0m 40ms/step - loss: 4.0550e-04 - val_loss: 0.0025\n"
1089
  ]
1090
  },
1091
  {
1092
  "data": {
1093
  "text/plain": [
1094
- "<keras.src.callbacks.history.History at 0x1fc4b1aecd0>"
1095
  ]
1096
  },
1097
- "execution_count": 7,
1098
  "metadata": {},
1099
  "output_type": "execute_result"
1100
  }
@@ -1103,17 +986,17 @@
1103
  "train,test = traindataset,testdataset\n",
1104
  "\n",
1105
  "def create_dataset(dataset,time_step):\n",
1106
- " x = [[] for _ in range(58)] \n",
1107
  " Y = []\n",
1108
  " for i in range(len(dataset) - time_step - 1):\n",
1109
- " for j in range(58):\n",
1110
  " x[j].append(dataset[i:(i + time_step), j])\n",
1111
- " Y.append([dataset[i + time_step, 56],dataset[i + time_step, 57]])\n",
1112
  " x= [np.array(feature_list) for feature_list in x]\n",
1113
  " Y = np.reshape(Y,(len(Y),2))\n",
1114
  " return np.stack(x,axis=2),Y\n",
1115
  "\n",
1116
- "time_step = 60\n",
1117
  "X_train, y_train = create_dataset(train, time_step)\n",
1118
  "X_test, y_test = create_dataset(test, time_step)\n",
1119
  "\n",
@@ -1121,93 +1004,36 @@
1121
  "model = Sequential()\n",
1122
  "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
1123
  "model.add(LSTM(units=50, return_sequences=True))\n",
1124
- "model.add(LSTM(units=50))\n",
1125
  "model.add(Dense(units=2))\n",
1126
  "\n",
1127
  "model.compile(optimizer='adam', loss='mean_squared_error')\n",
1128
  "\n",
1129
- "checkpoint_path = \"lstm3.keras\"\n",
1130
  "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
1131
- "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n"
1132
  ]
1133
  },
1134
  {
1135
  "cell_type": "code",
1136
- "execution_count": 39,
1137
  "metadata": {},
1138
- "outputs": [
1139
- {
1140
- "name": "stdout",
1141
- "output_type": "stream",
1142
- "text": [
1143
- "Epoch 1/10\n",
1144
- "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0050\n",
1145
- "Epoch 1: val_loss improved from inf to 0.03991, saving model to lstm3.keras\n",
1146
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0050 - val_loss: 0.0399\n",
1147
- "Epoch 2/10\n",
1148
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0050\n",
1149
- "Epoch 2: val_loss did not improve from 0.03991\n",
1150
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0050 - val_loss: 0.0480\n",
1151
- "Epoch 3/10\n",
1152
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0048\n",
1153
- "Epoch 3: val_loss did not improve from 0.03991\n",
1154
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0048 - val_loss: 0.0474\n",
1155
- "Epoch 4/10\n",
1156
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0047\n",
1157
- "Epoch 4: val_loss did not improve from 0.03991\n",
1158
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0047 - val_loss: 0.0492\n",
1159
- "Epoch 5/10\n",
1160
- "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0048\n",
1161
- "Epoch 5: val_loss improved from 0.03991 to 0.03753, saving model to lstm3.keras\n",
1162
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0048 - val_loss: 0.0375\n",
1163
- "Epoch 6/10\n",
1164
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0046\n",
1165
- "Epoch 6: val_loss did not improve from 0.03753\n",
1166
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0046 - val_loss: 0.0466\n",
1167
- "Epoch 7/10\n",
1168
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0043\n",
1169
- "Epoch 7: val_loss did not improve from 0.03753\n",
1170
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0043 - val_loss: 0.0499\n",
1171
- "Epoch 8/10\n",
1172
- "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0043\n",
1173
- "Epoch 8: val_loss did not improve from 0.03753\n",
1174
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0043 - val_loss: 0.0483\n",
1175
- "Epoch 9/10\n",
1176
- "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0042\n",
1177
- "Epoch 9: val_loss did not improve from 0.03753\n",
1178
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m69s\u001b[0m 22ms/step - loss: 0.0042 - val_loss: 0.0559\n",
1179
- "Epoch 10/10\n",
1180
- "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0044\n",
1181
- "Epoch 10: val_loss did not improve from 0.03753\n",
1182
- "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m68s\u001b[0m 21ms/step - loss: 0.0044 - val_loss: 0.0470\n"
1183
- ]
1184
- },
1185
- {
1186
- "data": {
1187
- "text/plain": [
1188
- "<keras.src.callbacks.history.History at 0x153b37086d0>"
1189
- ]
1190
- },
1191
- "execution_count": 39,
1192
- "metadata": {},
1193
- "output_type": "execute_result"
1194
- }
1195
- ],
1196
  "source": [
1197
- "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
1198
- "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
1199
  ]
1200
  },
1201
  {
1202
  "cell_type": "code",
1203
- "execution_count": 8,
1204
  "metadata": {},
1205
  "outputs": [
1206
  {
1207
  "name": "stdout",
1208
  "output_type": "stream",
1209
  "text": [
1210
- "\u001b[1m6344/6344\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 7ms/step\n"
1211
  ]
1212
  }
1213
  ],
@@ -1218,14 +1044,14 @@
1218
  },
1219
  {
1220
  "cell_type": "code",
1221
- "execution_count": 10,
1222
  "metadata": {},
1223
  "outputs": [],
1224
  "source": [
1225
  "%matplotlib qt\n",
1226
- "var = 0\n",
1227
- "plt.plot(testdataset_df['date'][61:],y_test[:,0], label='Original Testing Data', color='blue')\n",
1228
- "plt.plot(testdataset_df['date'][61:],test_predict1[:,0], label='Predicted Testing Data', color='red',alpha=0.8)\n",
1229
  "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n",
1230
  "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n",
1231
  "\n",
@@ -1239,7 +1065,7 @@
1239
  },
1240
  {
1241
  "cell_type": "code",
1242
- "execution_count": 11,
1243
  "metadata": {},
1244
  "outputs": [
1245
  {
@@ -1251,8 +1077,8 @@
1251
  }
1252
  ],
1253
  "source": [
1254
- "from tensorflow.keras.models import load_model\n",
1255
- "model.save(\"energy_model.h5\") "
1256
  ]
1257
  },
1258
  {
@@ -1278,6 +1104,1080 @@
1278
  "plt.plot(df_filtered['date'],df_filtered['zone_025_temp'])"
1279
  ]
1280
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1281
  {
1282
  "cell_type": "code",
1283
  "execution_count": null,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 181,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
16
  "from keras.models import Sequential\n",
17
  "from keras.layers import LSTM, Dense\n",
18
  "from sklearn.model_selection import train_test_split\n",
19
+ "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
20
  "from keras.callbacks import ModelCheckpoint\n"
21
  ]
22
  },
 
463
  },
464
  {
465
  "cell_type": "code",
466
+ "execution_count": 7,
467
  "metadata": {},
468
  "outputs": [
469
  {
 
502
  "dtype: int64"
503
  ]
504
  },
505
+ "execution_count": 7,
506
  "metadata": {},
507
  "output_type": "execute_result"
508
  }
 
782
  },
783
  {
784
  "cell_type": "code",
785
+ "execution_count": 19,
786
  "metadata": {},
787
  "outputs": [
788
  {
 
807
  " <tr style=\"text-align: right;\">\n",
808
  " <th></th>\n",
809
  " <th>date</th>\n",
810
+ " <th>air_temp_set_1</th>\n",
811
+ " <th>air_temp_set_2</th>\n",
812
+ " <th>dew_point_temperature_set_1d</th>\n",
813
+ " <th>relative_humidity_set_1</th>\n",
814
+ " <th>solar_radiation_set_1</th>\n",
815
+ " <th>wifi_third_south</th>\n",
816
+ " <th>wifi_fourth_south</th>\n",
 
 
 
 
 
 
 
 
 
 
 
817
  " <th>hvac_N</th>\n",
818
  " <th>hvac_S</th>\n",
819
  " </tr>\n",
 
822
  " <tr>\n",
823
  " <th>0</th>\n",
824
  " <td>2018-01-01 00:00:00</td>\n",
825
+ " <td>11.64</td>\n",
826
+ " <td>11.51</td>\n",
827
+ " <td>8.1</td>\n",
828
+ " <td>79.07</td>\n",
829
+ " <td>86.7</td>\n",
830
+ " <td>NaN</td>\n",
831
+ " <td>NaN</td>\n",
 
 
 
 
 
 
 
 
 
 
 
832
  " <td>NaN</td>\n",
833
  " <td>NaN</td>\n",
834
  " </tr>\n",
835
  " <tr>\n",
836
  " <th>1</th>\n",
837
  " <td>2018-01-01 00:01:00</td>\n",
838
+ " <td>11.64</td>\n",
839
+ " <td>11.51</td>\n",
840
+ " <td>8.1</td>\n",
841
+ " <td>79.07</td>\n",
842
+ " <td>86.7</td>\n",
843
+ " <td>NaN</td>\n",
844
+ " <td>NaN</td>\n",
 
 
 
 
 
 
 
 
 
 
 
845
  " <td>NaN</td>\n",
846
  " <td>NaN</td>\n",
847
  " </tr>\n",
848
  " </tbody>\n",
849
  "</table>\n",
 
850
  "</div>"
851
  ],
852
  "text/plain": [
853
+ " date air_temp_set_1 air_temp_set_2 \\\n",
854
+ "0 2018-01-01 00:00:00 11.64 11.51 \n",
855
+ "1 2018-01-01 00:01:00 11.64 11.51 \n",
 
 
 
 
 
 
 
 
 
 
 
 
856
  "\n",
857
+ " dew_point_temperature_set_1d relative_humidity_set_1 \\\n",
858
+ "0 8.1 79.07 \n",
859
+ "1 8.1 79.07 \n",
860
  "\n",
861
+ " solar_radiation_set_1 wifi_third_south wifi_fourth_south hvac_N hvac_S \n",
862
+ "0 86.7 NaN NaN NaN NaN \n",
863
+ "1 86.7 NaN NaN NaN NaN "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
864
  ]
865
  },
866
+ "execution_count": 19,
867
  "metadata": {},
868
  "output_type": "execute_result"
869
  }
 
872
  "zone = [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\",\"16\", \"17\", \"21\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]\n",
873
  "rtu = [\"rtu_001\",\"rtu_002\",\"rtu_003\",\"rtu_004\"]\n",
874
  "wing = [\"hvac_N\",\"hvac_S\"]\n",
875
+ "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
876
+ "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
877
  "# any(sub in col for sub in zone) or\n",
878
+ "energy_data = merged[[\"date\"]+[col for col in merged.columns if \n",
879
+ " any(sub in col for sub in env) or any(sub in col for sub in wifi)]+wing]\n",
880
  "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
881
  "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
882
  "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
 
886
  },
887
  {
888
  "cell_type": "code",
889
+ "execution_count": 20,
890
  "metadata": {},
891
  "outputs": [
892
  {
 
907
  },
908
  {
909
  "cell_type": "code",
910
+ "execution_count": 21,
911
  "metadata": {},
912
  "outputs": [
913
  {
 
916
  "[]"
917
  ]
918
  },
919
+ "execution_count": 21,
920
  "metadata": {},
921
  "output_type": "execute_result"
922
  }
 
936
  },
937
  {
938
  "cell_type": "code",
939
+ "execution_count": 22,
940
  "metadata": {},
941
  "outputs": [],
942
  "source": [
 
950
  },
951
  {
952
  "cell_type": "code",
953
+ "execution_count": 26,
954
  "metadata": {},
955
  "outputs": [
 
 
 
 
 
 
 
 
956
  {
957
  "name": "stdout",
958
  "output_type": "stream",
959
  "text": [
960
+ "Epoch 1/3\n",
961
+ "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.0036\n",
962
+ "Epoch 1: val_loss improved from inf to 0.00068, saving model to lstm_energy_01.keras\n",
963
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0036 - val_loss: 6.8049e-04\n",
964
+ "Epoch 2/3\n",
965
+ "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.8055e-04\n",
966
+ "Epoch 2: val_loss improved from 0.00068 to 0.00064, saving model to lstm_energy_01.keras\n",
967
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m60s\u001b[0m 19ms/step - loss: 4.8055e-04 - val_loss: 6.4225e-04\n",
968
+ "Epoch 3/3\n",
969
+ "\u001b[1m3219/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 4.6623e-04\n",
970
+ "Epoch 3: val_loss improved from 0.00064 to 0.00061, saving model to lstm_energy_01.keras\n",
971
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m61s\u001b[0m 19ms/step - loss: 4.6622e-04 - val_loss: 6.0579e-04\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972
  ]
973
  },
974
  {
975
  "data": {
976
  "text/plain": [
977
+ "<keras.src.callbacks.history.History at 0x1ea4d2ed650>"
978
  ]
979
  },
980
+ "execution_count": 26,
981
  "metadata": {},
982
  "output_type": "execute_result"
983
  }
 
986
  "train,test = traindataset,testdataset\n",
987
  "\n",
988
  "def create_dataset(dataset,time_step):\n",
989
+ " x = [[] for _ in range(9)] \n",
990
  " Y = []\n",
991
  " for i in range(len(dataset) - time_step - 1):\n",
992
+ " for j in range(9):\n",
993
  " x[j].append(dataset[i:(i + time_step), j])\n",
994
+ " Y.append([dataset[i + time_step, 7],dataset[i + time_step, 8]])\n",
995
  " x= [np.array(feature_list) for feature_list in x]\n",
996
  " Y = np.reshape(Y,(len(Y),2))\n",
997
  " return np.stack(x,axis=2),Y\n",
998
  "\n",
999
+ "time_step = 30\n",
1000
  "X_train, y_train = create_dataset(train, time_step)\n",
1001
  "X_test, y_test = create_dataset(test, time_step)\n",
1002
  "\n",
 
1004
  "model = Sequential()\n",
1005
  "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
1006
  "model.add(LSTM(units=50, return_sequences=True))\n",
1007
+ "model.add(LSTM(units=30))\n",
1008
  "model.add(Dense(units=2))\n",
1009
  "\n",
1010
  "model.compile(optimizer='adam', loss='mean_squared_error')\n",
1011
  "\n",
1012
+ "checkpoint_path = \"lstm_energy_01.keras\"\n",
1013
  "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
1014
+ "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3, batch_size=64, verbose=1, callbacks=[checkpoint_callback])\n"
1015
  ]
1016
  },
1017
  {
1018
  "cell_type": "code",
1019
+ "execution_count": 18,
1020
  "metadata": {},
1021
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1022
  "source": [
1023
+ "# checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
1024
+ "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
1025
  ]
1026
  },
1027
  {
1028
  "cell_type": "code",
1029
+ "execution_count": 27,
1030
  "metadata": {},
1031
  "outputs": [
1032
  {
1033
  "name": "stdout",
1034
  "output_type": "stream",
1035
  "text": [
1036
+ "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 3ms/step\n"
1037
  ]
1038
  }
1039
  ],
 
1044
  },
1045
  {
1046
  "cell_type": "code",
1047
+ "execution_count": 30,
1048
  "metadata": {},
1049
  "outputs": [],
1050
  "source": [
1051
  "%matplotlib qt\n",
1052
+ "var = 1\n",
1053
+ "plt.plot(testdataset_df['date'][31:],y_test[:,1], label='Original Testing Data', color='blue')\n",
1054
+ "plt.plot(testdataset_df['date'][31:],test_predict1[:,1], label='Predicted Testing Data', color='red',alpha=0.8)\n",
1055
  "# anomalies = np.where(abs(test_predict[:,var] - y_test[:,var]) > 0.38)[0]\n",
1056
  "# plt.scatter(anomalies,test_predict[anomalies,var], color='black',marker =\"o\",s=100 )\n",
1057
  "\n",
 
1065
  },
1066
  {
1067
  "cell_type": "code",
1068
+ "execution_count": 31,
1069
  "metadata": {},
1070
  "outputs": [
1071
  {
 
1077
  }
1078
  ],
1079
  "source": [
1080
+ "# from tensorflow.keras.models import load_model\n",
1081
+ "# model.save(\"energy_model_01.h5\") "
1082
  ]
1083
  },
1084
  {
 
1104
  "plt.plot(df_filtered['date'],df_filtered['zone_025_temp'])"
1105
  ]
1106
  },
1107
+ {
1108
+ "cell_type": "code",
1109
+ "execution_count": 22,
1110
+ "metadata": {},
1111
+ "outputs": [
1112
+ {
1113
+ "data": {
1114
+ "text/plain": [
1115
+ "[<matplotlib.lines.Line2D at 0x1fe7e211d90>]"
1116
+ ]
1117
+ },
1118
+ "execution_count": 22,
1119
+ "metadata": {},
1120
+ "output_type": "execute_result"
1121
+ }
1122
+ ],
1123
+ "source": [
1124
+ "plt.plot(merged['hvac_S'])\n",
1125
+ "plt.plot(testdataset_df['hvac_S'])\n",
1126
+ "plt.plot(traindataset_df['hvac_S'],'r')"
1127
+ ]
1128
+ },
1129
+ {
1130
+ "cell_type": "code",
1131
+ "execution_count": 24,
1132
+ "metadata": {},
1133
+ "outputs": [
1134
+ {
1135
+ "data": {
1136
+ "text/plain": [
1137
+ "[<matplotlib.lines.Line2D at 0x1fe8ecf5bd0>]"
1138
+ ]
1139
+ },
1140
+ "execution_count": 24,
1141
+ "metadata": {},
1142
+ "output_type": "execute_result"
1143
+ }
1144
+ ],
1145
+ "source": [
1146
+ "plt.plot(merged['hvac_N'])\n",
1147
+ "plt.plot(testdataset_df['hvac_N'])\n",
1148
+ "plt.plot(traindataset_df['hvac_N'],'r')"
1149
+ ]
1150
+ },
1151
+ {
1152
+ "cell_type": "code",
1153
+ "execution_count": 32,
1154
+ "metadata": {},
1155
+ "outputs": [],
1156
+ "source": [
1157
+ "# merged.columns.to_list()"
1158
+ ]
1159
+ },
1160
+ {
1161
+ "cell_type": "code",
1162
+ "execution_count": 3,
1163
+ "metadata": {},
1164
+ "outputs": [
1165
+ {
1166
+ "name": "stdout",
1167
+ "output_type": "stream",
1168
+ "text": [
1169
+ "2.16.1\n"
1170
+ ]
1171
+ }
1172
+ ],
1173
+ "source": []
1174
+ },
1175
+ {
1176
+ "cell_type": "markdown",
1177
+ "metadata": {},
1178
+ "source": [
1179
+ "LSTM 2.0"
1180
+ ]
1181
+ },
1182
+ {
1183
+ "cell_type": "code",
1184
+ "execution_count": 39,
1185
+ "metadata": {},
1186
+ "outputs": [
1187
+ {
1188
+ "data": {
1189
+ "text/html": [
1190
+ "<div>\n",
1191
+ "<style scoped>\n",
1192
+ " .dataframe tbody tr th:only-of-type {\n",
1193
+ " vertical-align: middle;\n",
1194
+ " }\n",
1195
+ "\n",
1196
+ " .dataframe tbody tr th {\n",
1197
+ " vertical-align: top;\n",
1198
+ " }\n",
1199
+ "\n",
1200
+ " .dataframe thead th {\n",
1201
+ " text-align: right;\n",
1202
+ " }\n",
1203
+ "</style>\n",
1204
+ "<table border=\"1\" class=\"dataframe\">\n",
1205
+ " <thead>\n",
1206
+ " <tr style=\"text-align: right;\">\n",
1207
+ " <th></th>\n",
1208
+ " <th>date</th>\n",
1209
+ " <th>hp_hws_temp</th>\n",
1210
+ " <th>rtu_003_sat_sp_tn</th>\n",
1211
+ " <th>rtu_003_fltrd_sa_flow_tn</th>\n",
1212
+ " <th>rtu_003_sa_temp</th>\n",
1213
+ " <th>rtu_003_pa_static_stpt_tn</th>\n",
1214
+ " <th>rtu_003_oa_flow_tn</th>\n",
1215
+ " <th>rtu_003_oadmpr_pct</th>\n",
1216
+ " <th>rtu_003_econ_stpt_tn</th>\n",
1217
+ " <th>rtu_003_ra_temp</th>\n",
1218
+ " <th>...</th>\n",
1219
+ " <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
1220
+ " <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
1221
+ " <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
1222
+ " <th>wifi_third_south</th>\n",
1223
+ " <th>wifi_fourth_south</th>\n",
1224
+ " <th>air_temp_set_1</th>\n",
1225
+ " <th>air_temp_set_2</th>\n",
1226
+ " <th>dew_point_temperature_set_1d</th>\n",
1227
+ " <th>relative_humidity_set_1</th>\n",
1228
+ " <th>solar_radiation_set_1</th>\n",
1229
+ " </tr>\n",
1230
+ " </thead>\n",
1231
+ " <tbody>\n",
1232
+ " <tr>\n",
1233
+ " <th>0</th>\n",
1234
+ " <td>2018-01-01 00:00:00</td>\n",
1235
+ " <td>75.3</td>\n",
1236
+ " <td>65.0</td>\n",
1237
+ " <td>13558.539</td>\n",
1238
+ " <td>65.5</td>\n",
1239
+ " <td>0.6</td>\n",
1240
+ " <td>0.000000</td>\n",
1241
+ " <td>34.6</td>\n",
1242
+ " <td>65.0</td>\n",
1243
+ " <td>67.9</td>\n",
1244
+ " <td>...</td>\n",
1245
+ " <td>49.9</td>\n",
1246
+ " <td>0.04</td>\n",
1247
+ " <td>0.05</td>\n",
1248
+ " <td>NaN</td>\n",
1249
+ " <td>NaN</td>\n",
1250
+ " <td>11.64</td>\n",
1251
+ " <td>11.51</td>\n",
1252
+ " <td>8.1</td>\n",
1253
+ " <td>79.07</td>\n",
1254
+ " <td>86.7</td>\n",
1255
+ " </tr>\n",
1256
+ " <tr>\n",
1257
+ " <th>1</th>\n",
1258
+ " <td>2018-01-01 00:01:00</td>\n",
1259
+ " <td>75.3</td>\n",
1260
+ " <td>65.0</td>\n",
1261
+ " <td>13592.909</td>\n",
1262
+ " <td>65.6</td>\n",
1263
+ " <td>0.6</td>\n",
1264
+ " <td>5992.059572</td>\n",
1265
+ " <td>34.6</td>\n",
1266
+ " <td>65.0</td>\n",
1267
+ " <td>67.9</td>\n",
1268
+ " <td>...</td>\n",
1269
+ " <td>49.4</td>\n",
1270
+ " <td>0.04</td>\n",
1271
+ " <td>0.04</td>\n",
1272
+ " <td>NaN</td>\n",
1273
+ " <td>NaN</td>\n",
1274
+ " <td>11.64</td>\n",
1275
+ " <td>11.51</td>\n",
1276
+ " <td>8.1</td>\n",
1277
+ " <td>79.07</td>\n",
1278
+ " <td>86.7</td>\n",
1279
+ " </tr>\n",
1280
+ " </tbody>\n",
1281
+ "</table>\n",
1282
+ "<p>2 rows × 23 columns</p>\n",
1283
+ "</div>"
1284
+ ],
1285
+ "text/plain": [
1286
+ " date hp_hws_temp rtu_003_sat_sp_tn \\\n",
1287
+ "0 2018-01-01 00:00:00 75.3 65.0 \n",
1288
+ "1 2018-01-01 00:01:00 75.3 65.0 \n",
1289
+ "\n",
1290
+ " rtu_003_fltrd_sa_flow_tn rtu_003_sa_temp rtu_003_pa_static_stpt_tn \\\n",
1291
+ "0 13558.539 65.5 0.6 \n",
1292
+ "1 13592.909 65.6 0.6 \n",
1293
+ "\n",
1294
+ " rtu_003_oa_flow_tn rtu_003_oadmpr_pct rtu_003_econ_stpt_tn \\\n",
1295
+ "0 0.000000 34.6 65.0 \n",
1296
+ "1 5992.059572 34.6 65.0 \n",
1297
+ "\n",
1298
+ " rtu_003_ra_temp ... rtu_003_rf_vfd_spd_fbk_tn \\\n",
1299
+ "0 67.9 ... 49.9 \n",
1300
+ "1 67.9 ... 49.4 \n",
1301
+ "\n",
1302
+ " rtu_003_fltrd_gnd_lvl_plenum_press_tn rtu_003_fltrd_lvl2_plenum_press_tn \\\n",
1303
+ "0 0.04 0.05 \n",
1304
+ "1 0.04 0.04 \n",
1305
+ "\n",
1306
+ " wifi_third_south wifi_fourth_south air_temp_set_1 air_temp_set_2 \\\n",
1307
+ "0 NaN NaN 11.64 11.51 \n",
1308
+ "1 NaN NaN 11.64 11.51 \n",
1309
+ "\n",
1310
+ " dew_point_temperature_set_1d relative_humidity_set_1 \\\n",
1311
+ "0 8.1 79.07 \n",
1312
+ "1 8.1 79.07 \n",
1313
+ "\n",
1314
+ " solar_radiation_set_1 \n",
1315
+ "0 86.7 \n",
1316
+ "1 86.7 \n",
1317
+ "\n",
1318
+ "[2 rows x 23 columns]"
1319
+ ]
1320
+ },
1321
+ "execution_count": 39,
1322
+ "metadata": {},
1323
+ "output_type": "execute_result"
1324
+ }
1325
+ ],
1326
+ "source": [
1327
+ "rtu = [\"rtu_003\"]\n",
1328
+ "# wing = [\"hvac_N\",\"hvac_S\"]\n",
1329
+ "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
1330
+ "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
1331
+ "[\"rtu_003_ma_temp\",]\n",
1332
+ "# any(sub in col for sub in zone) or\n",
1333
+ "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n",
1334
+ " any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n",
1335
+ "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
1336
+ "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
1337
+ "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
1338
+ "# df_filtered = df_filtered.dropna()\n",
1339
+ "df_filtered.head(2)"
1340
+ ]
1341
+ },
1342
+ {
1343
+ "cell_type": "code",
1344
+ "execution_count": 40,
1345
+ "metadata": {},
1346
+ "outputs": [],
1347
+ "source": [
1348
+ "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
1349
+ "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2019, 4, 1)) & (df_filtered.date.dt.date< date(2020, 2, 15))]\n",
1350
+ "# df_filtered.isna().sum()\n",
1351
+ "if df_filtered.isna().any().any():\n",
1352
+ " print(\"There are NA values in the DataFrame columns.\")"
1353
+ ]
1354
+ },
1355
+ {
1356
+ "cell_type": "code",
1357
+ "execution_count": 68,
1358
+ "metadata": {},
1359
+ "outputs": [],
1360
+ "source": [
1361
+ "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n",
1362
+ " 'rtu_003_sa_temp',\n",
1363
+ " 'rtu_003_oadmpr_pct',\n",
1364
+ " 'rtu_003_ra_temp',\n",
1365
+ " 'rtu_003_oa_temp',\n",
1366
+ " 'rtu_003_ma_temp',\n",
1367
+ " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
1368
+ " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
1369
+ " 'wifi_fourth_south',\n",
1370
+ " 'air_temp_set_1',\n",
1371
+ " 'air_temp_set_2',\n",
1372
+ " 'dew_point_temperature_set_1d',\n",
1373
+ " 'relative_humidity_set_1',\n",
1374
+ " 'solar_radiation_set_1']]"
1375
+ ]
1376
+ },
1377
+ {
1378
+ "cell_type": "code",
1379
+ "execution_count": 188,
1380
+ "metadata": {},
1381
+ "outputs": [
1382
+ {
1383
+ "data": {
1384
+ "text/plain": [
1385
+ "[]"
1386
+ ]
1387
+ },
1388
+ "execution_count": 188,
1389
+ "metadata": {},
1390
+ "output_type": "execute_result"
1391
+ }
1392
+ ],
1393
+ "source": [
1394
+ "testdataset_df = df_filtered[(df_filtered.date.dt.date <date(2019, 8, 21))]\n",
1395
+ "\n",
1396
+ "traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
1397
+ "# .ewm(com = 1000,adjust=True).mean()\n",
1398
+ "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n",
1399
+ "\n",
1400
+ "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n",
1401
+ "\n",
1402
+ "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
1403
+ "columns_with_na"
1404
+ ]
1405
+ },
1406
+ {
1407
+ "cell_type": "code",
1408
+ "execution_count": 189,
1409
+ "metadata": {},
1410
+ "outputs": [],
1411
+ "source": [
1412
+ "traindataset = traindataset.astype('float32')\n",
1413
+ "testdataset = testdataset.astype('float32')\n",
1414
+ "\n",
1415
+ "scaler = StandardScaler()\n",
1416
+ "traindataset = scaler.fit_transform(traindataset)\n",
1417
+ "testdataset = scaler.transform(testdataset)"
1418
+ ]
1419
+ },
1420
+ {
1421
+ "cell_type": "code",
1422
+ "execution_count": 191,
1423
+ "metadata": {},
1424
+ "outputs": [
1425
+ {
1426
+ "name": "stdout",
1427
+ "output_type": "stream",
1428
+ "text": [
1429
+ "Epoch 1/5\n"
1430
+ ]
1431
+ },
1432
+ {
1433
+ "name": "stderr",
1434
+ "output_type": "stream",
1435
+ "text": [
1436
+ "c:\\Users\\jerin\\anaconda3\\envs\\smartbuilding\\Lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:205: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
1437
+ " super().__init__(**kwargs)\n"
1438
+ ]
1439
+ },
1440
+ {
1441
+ "name": "stdout",
1442
+ "output_type": "stream",
1443
+ "text": [
1444
+ "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 0.1413\n",
1445
+ "Epoch 1: val_loss improved from inf to 0.52256, saving model to lstm_smooth_01.keras\n",
1446
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━��━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.1412 - val_loss: 0.5226\n",
1447
+ "Epoch 2/5\n",
1448
+ "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0393\n",
1449
+ "Epoch 2: val_loss improved from 0.52256 to 0.50228, saving model to lstm_smooth_01.keras\n",
1450
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0393 - val_loss: 0.5023\n",
1451
+ "Epoch 3/5\n",
1452
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0347\n",
1453
+ "Epoch 3: val_loss improved from 0.50228 to 0.48711, saving model to lstm_smooth_01.keras\n",
1454
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m65s\u001b[0m 20ms/step - loss: 0.0347 - val_loss: 0.4871\n",
1455
+ "Epoch 4/5\n",
1456
+ "\u001b[1m3217/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0319\n",
1457
+ "Epoch 4: val_loss did not improve from 0.48711\n",
1458
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m66s\u001b[0m 20ms/step - loss: 0.0319 - val_loss: 0.4958\n",
1459
+ "Epoch 5/5\n",
1460
+ "\u001b[1m3218/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 0.0303\n",
1461
+ "Epoch 5: val_loss did not improve from 0.48711\n",
1462
+ "\u001b[1m3220/3220\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m67s\u001b[0m 21ms/step - loss: 0.0303 - val_loss: 0.5026\n"
1463
+ ]
1464
+ },
1465
+ {
1466
+ "data": {
1467
+ "text/plain": [
1468
+ "<keras.src.callbacks.history.History at 0x1d460bb83d0>"
1469
+ ]
1470
+ },
1471
+ "execution_count": 191,
1472
+ "metadata": {},
1473
+ "output_type": "execute_result"
1474
+ }
1475
+ ],
1476
+ "source": [
1477
+ "train,test = traindataset,testdataset\n",
1478
+ "\n",
1479
+ "def create_dataset(dataset,time_step):\n",
1480
+ " x = [[] for _ in range(15)] \n",
1481
+ " Y = []\n",
1482
+ " for i in range(len(dataset) - time_step - 1):\n",
1483
+ " for j in range(15):\n",
1484
+ " x[j].append(dataset[i:(i + time_step), j])\n",
1485
+ " Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n",
1486
+ " dataset[i + time_step, 6],dataset[i + time_step, 7]])\n",
1487
+ " x= [np.array(feature_list) for feature_list in x]\n",
1488
+ " Y = np.reshape(Y,(len(Y),8))\n",
1489
+ " return np.stack(x,axis=2),Y\n",
1490
+ "\n",
1491
+ "time_step = 30\n",
1492
+ "X_train, y_train = create_dataset(train, time_step)\n",
1493
+ "X_test, y_test = create_dataset(test, time_step)\n",
1494
+ "\n",
1495
+ "\n",
1496
+ "model = Sequential()\n",
1497
+ "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
1498
+ "model.add(LSTM(units=50, return_sequences=True))\n",
1499
+ "model.add(LSTM(units=30))\n",
1500
+ "model.add(Dense(units=8))\n",
1501
+ "\n",
1502
+ "model.compile(optimizer='adam', loss='mean_squared_error')\n",
1503
+ "\n",
1504
+ "checkpoint_path = \"lstm_smooth_01.keras\"\n",
1505
+ "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
1506
+ "model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=64, verbose=1, callbacks=[checkpoint_callback])"
1507
+ ]
1508
+ },
1509
+ {
1510
+ "cell_type": "code",
1511
+ "execution_count": 192,
1512
+ "metadata": {},
1513
+ "outputs": [
1514
+ {
1515
+ "name": "stdout",
1516
+ "output_type": "stream",
1517
+ "text": [
1518
+ "\u001b[1m6345/6345\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m23s\u001b[0m 4ms/step\n"
1519
+ ]
1520
+ }
1521
+ ],
1522
+ "source": [
1523
+ "test_predict1 = model.predict(X_test)"
1524
+ ]
1525
+ },
1526
+ {
1527
+ "cell_type": "code",
1528
+ "execution_count": 193,
1529
+ "metadata": {},
1530
+ "outputs": [],
1531
+ "source": [
1532
+ "%matplotlib qt\n",
1533
+ "var = 0\n",
1534
+ "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
1535
+ "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
1536
+ "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)[var]\n",
1537
+ "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
1538
+ "\n",
1539
+ "\n",
1540
+ "plt.title('Testing Data - Predicted vs Actual')\n",
1541
+ "plt.xlabel('Time')\n",
1542
+ "plt.ylabel('Value')\n",
1543
+ "plt.legend()\n",
1544
+ "plt.show()"
1545
+ ]
1546
+ },
1547
+ {
1548
+ "cell_type": "code",
1549
+ "execution_count": 176,
1550
+ "metadata": {},
1551
+ "outputs": [
1552
+ {
1553
+ "data": {
1554
+ "text/plain": [
1555
+ "[<matplotlib.lines.Line2D at 0x1d334841450>]"
1556
+ ]
1557
+ },
1558
+ "execution_count": 176,
1559
+ "metadata": {},
1560
+ "output_type": "execute_result"
1561
+ }
1562
+ ],
1563
+ "source": [
1564
+ "var = 0\n",
1565
+ "plt.plot((test_predict1 - y_test)[:,var])"
1566
+ ]
1567
+ },
1568
+ {
1569
+ "cell_type": "code",
1570
+ "execution_count": 72,
1571
+ "metadata": {},
1572
+ "outputs": [],
1573
+ "source": [
1574
+ "params = ['hp_hws_temp',\n",
1575
+ " 'rtu_003_sa_temp',\n",
1576
+ " 'rtu_003_oadmpr_pct',\n",
1577
+ " 'rtu_003_ra_temp',\n",
1578
+ " 'rtu_003_oa_temp',\n",
1579
+ " 'rtu_003_ma_temp',\n",
1580
+ " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
1581
+ " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
1582
+ " 'wifi_fourth_south',\n",
1583
+ " 'air_temp_set_1',\n",
1584
+ " 'air_temp_set_2',\n",
1585
+ " 'dew_point_temperature_set_1d',\n",
1586
+ " 'relative_humidity_set_1',\n",
1587
+ " 'solar_radiation_set_1']\n",
1588
+ "\n",
1589
+ "idx_2_params = {}\n",
1590
+ "for i, param in enumerate(params):\n",
1591
+ " idx_2_params[i] = param"
1592
+ ]
1593
+ },
1594
+ {
1595
+ "cell_type": "code",
1596
+ "execution_count": 73,
1597
+ "metadata": {},
1598
+ "outputs": [
1599
+ {
1600
+ "data": {
1601
+ "text/plain": [
1602
+ "{0: 'hp_hws_temp',\n",
1603
+ " 1: 'rtu_003_sa_temp',\n",
1604
+ " 2: 'rtu_003_oadmpr_pct',\n",
1605
+ " 3: 'rtu_003_ra_temp',\n",
1606
+ " 4: 'rtu_003_oa_temp',\n",
1607
+ " 5: 'rtu_003_ma_temp',\n",
1608
+ " 6: 'rtu_003_sf_vfd_spd_fbk_tn',\n",
1609
+ " 7: 'rtu_003_rf_vfd_spd_fbk_tn',\n",
1610
+ " 8: 'wifi_third_south',\n",
1611
+ " 9: 'wifi_fourth_south',\n",
1612
+ " 10: 'air_temp_set_1',\n",
1613
+ " 11: 'air_temp_set_2',\n",
1614
+ " 12: 'dew_point_temperature_set_1d',\n",
1615
+ " 13: 'relative_humidity_set_1',\n",
1616
+ " 14: 'solar_radiation_set_1'}"
1617
+ ]
1618
+ },
1619
+ "execution_count": 73,
1620
+ "metadata": {},
1621
+ "output_type": "execute_result"
1622
+ }
1623
+ ],
1624
+ "source": [
1625
+ "idx_2_params"
1626
+ ]
1627
+ },
1628
+ {
1629
+ "cell_type": "markdown",
1630
+ "metadata": {},
1631
+ "source": [
1632
+ "KMEANS"
1633
+ ]
1634
+ },
1635
+ {
1636
+ "cell_type": "code",
1637
+ "execution_count": 194,
1638
+ "metadata": {},
1639
+ "outputs": [],
1640
+ "source": [
1641
+ "from sklearn.cluster import KMeans\n",
1642
+ "import numpy as np\n",
1643
+ "import matplotlib.pyplot as plt\n",
1644
+ "from sklearn.decomposition import PCA\n",
1645
+ "# Generating random data for demonstration\n",
1646
+ "np.random.seed(0)\n",
1647
+ "X = test_predict1 - y_test\n",
1648
+ "\n",
1649
+ "pca = PCA(n_components=2)\n",
1650
+ "X = pca.fit_transform(X)\n",
1651
+ "\n",
1652
+ "\n",
1653
+ "k = 3\n",
1654
+ "\n",
1655
+ "kmeans = KMeans(n_clusters=k)\n",
1656
+ "\n",
1657
+ "kmeans.fit(X)\n",
1658
+ "\n",
1659
+ "# Getting the cluster centers and labels\n",
1660
+ "centroids = kmeans.cluster_centers_\n",
1661
+ "labels = kmeans.labels_\n",
1662
+ "\n",
1663
+ "# Plotting the data points and cluster centers\n",
1664
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5, edgecolors='k')\n",
1665
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
1666
+ "plt.title('KMeans Clustering')\n",
1667
+ "plt.xlabel('Feature 1')\n",
1668
+ "plt.ylabel('Feature 2')\n",
1669
+ "plt.show()\n"
1670
+ ]
1671
+ },
1672
+ {
1673
+ "cell_type": "code",
1674
+ "execution_count": 109,
1675
+ "metadata": {},
1676
+ "outputs": [
1677
+ {
1678
+ "data": {
1679
+ "text/plain": [
1680
+ "[<matplotlib.lines.Line2D at 0x1d33ccad250>]"
1681
+ ]
1682
+ },
1683
+ "execution_count": 109,
1684
+ "metadata": {},
1685
+ "output_type": "execute_result"
1686
+ }
1687
+ ],
1688
+ "source": [
1689
+ "plt.plot((test_predict1 - y_test)[:,2])"
1690
+ ]
1691
+ },
1692
+ {
1693
+ "cell_type": "code",
1694
+ "execution_count": 77,
1695
+ "metadata": {},
1696
+ "outputs": [
1697
+ {
1698
+ "data": {
1699
+ "text/plain": [
1700
+ "array([[ 5.8607887e-02, -2.4713947e-01, 2.4978706e-01, -7.8289807e-01,\n",
1701
+ " -2.0218764e-01, -2.8860569e-01, 2.7817219e-01, 2.4209845e-01],\n",
1702
+ " [-2.6845999e-02, 1.2596852e-01, 9.6294099e-01, 2.0099232e-01,\n",
1703
+ " 3.3391420e-02, 7.7613303e-04, -7.1204931e-02, -9.7836025e-02]],\n",
1704
+ " dtype=float32)"
1705
+ ]
1706
+ },
1707
+ "execution_count": 77,
1708
+ "metadata": {},
1709
+ "output_type": "execute_result"
1710
+ }
1711
+ ],
1712
+ "source": [
1713
+ "pca.components_"
1714
+ ]
1715
+ },
1716
+ {
1717
+ "cell_type": "code",
1718
+ "execution_count": 204,
1719
+ "metadata": {},
1720
+ "outputs": [],
1721
+ "source": [
1722
+ "k = 60\n",
1723
+ "X= test_predict1 - y_test\n",
1724
+ "processed_data = []\n",
1725
+ "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
1726
+ "for i in range(0,len(X), 30 ):\n",
1727
+ " mean = X[i:i+k].mean(axis = 0)\n",
1728
+ " std = X[i:i+k].std(axis = 0)\n",
1729
+ " max = X[i:i+k].max(axis = 0)\n",
1730
+ " min = X[i:i+k].min(axis = 0)\n",
1731
+ " iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
1732
+ " data = np.concatenate([mean, std, max, min, iqr])\n",
1733
+ " processed_data.append([data])\n",
1734
+ "processed_data = np.concatenate(processed_data,axis=0) "
1735
+ ]
1736
+ },
1737
+ {
1738
+ "cell_type": "code",
1739
+ "execution_count": 197,
1740
+ "metadata": {},
1741
+ "outputs": [],
1742
+ "source": [
1743
+ "X = processed_data\n",
1744
+ "\n",
1745
+ "pca = PCA(n_components=2)\n",
1746
+ "X = pca.fit_transform(X)\n",
1747
+ "\n",
1748
+ "\n",
1749
+ "k = 4\n",
1750
+ "\n",
1751
+ "kmeans = KMeans(n_clusters=k)\n",
1752
+ "\n",
1753
+ "kmeans.fit(X)\n",
1754
+ "\n",
1755
+ "# Getting the cluster centers and labels\n",
1756
+ "centroids = kmeans.cluster_centers_\n",
1757
+ "labels = kmeans.labels_\n",
1758
+ "\n",
1759
+ "# Plotting the data points and cluster centers\n",
1760
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
1761
+ "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
1762
+ "plt.title('KMeans Clustering')\n",
1763
+ "plt.xlabel('Feature 1')\n",
1764
+ "plt.ylabel('Feature 2')\n",
1765
+ "plt.show()\n"
1766
+ ]
1767
+ },
1768
+ {
1769
+ "cell_type": "code",
1770
+ "execution_count": 167,
1771
+ "metadata": {},
1772
+ "outputs": [],
1773
+ "source": [
1774
+ "\n",
1775
+ "dd = df_filtered.drop(columns=[\"date\"],inplace=False)\n",
1776
+ "dg = dd.ewm(com = 1000,adjust=True).mean()"
1777
+ ]
1778
+ },
1779
+ {
1780
+ "cell_type": "code",
1781
+ "execution_count": 168,
1782
+ "metadata": {},
1783
+ "outputs": [
1784
+ {
1785
+ "data": {
1786
+ "text/html": [
1787
+ "<div>\n",
1788
+ "<style scoped>\n",
1789
+ " .dataframe tbody tr th:only-of-type {\n",
1790
+ " vertical-align: middle;\n",
1791
+ " }\n",
1792
+ "\n",
1793
+ " .dataframe tbody tr th {\n",
1794
+ " vertical-align: top;\n",
1795
+ " }\n",
1796
+ "\n",
1797
+ " .dataframe thead th {\n",
1798
+ " text-align: right;\n",
1799
+ " }\n",
1800
+ "</style>\n",
1801
+ "<table border=\"1\" class=\"dataframe\">\n",
1802
+ " <thead>\n",
1803
+ " <tr style=\"text-align: right;\">\n",
1804
+ " <th></th>\n",
1805
+ " <th>date</th>\n",
1806
+ " <th>hp_hws_temp</th>\n",
1807
+ " <th>rtu_003_sa_temp</th>\n",
1808
+ " <th>rtu_003_oadmpr_pct</th>\n",
1809
+ " <th>rtu_003_ra_temp</th>\n",
1810
+ " <th>rtu_003_oa_temp</th>\n",
1811
+ " <th>rtu_003_ma_temp</th>\n",
1812
+ " <th>rtu_003_sf_vfd_spd_fbk_tn</th>\n",
1813
+ " <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
1814
+ " <th>wifi_third_south</th>\n",
1815
+ " <th>wifi_fourth_south</th>\n",
1816
+ " <th>air_temp_set_1</th>\n",
1817
+ " <th>air_temp_set_2</th>\n",
1818
+ " <th>dew_point_temperature_set_1d</th>\n",
1819
+ " <th>relative_humidity_set_1</th>\n",
1820
+ " <th>solar_radiation_set_1</th>\n",
1821
+ " </tr>\n",
1822
+ " </thead>\n",
1823
+ " <tbody>\n",
1824
+ " <tr>\n",
1825
+ " <th>555845</th>\n",
1826
+ " <td>2019-04-02 00:00:00</td>\n",
1827
+ " <td>120.7</td>\n",
1828
+ " <td>66.7</td>\n",
1829
+ " <td>84.4</td>\n",
1830
+ " <td>72.2</td>\n",
1831
+ " <td>59.2</td>\n",
1832
+ " <td>66.7</td>\n",
1833
+ " <td>79.6</td>\n",
1834
+ " <td>53.7</td>\n",
1835
+ " <td>34.0</td>\n",
1836
+ " <td>31.0</td>\n",
1837
+ " <td>15.67</td>\n",
1838
+ " <td>14.92</td>\n",
1839
+ " <td>11.77</td>\n",
1840
+ " <td>77.80</td>\n",
1841
+ " <td>147.1</td>\n",
1842
+ " </tr>\n",
1843
+ " <tr>\n",
1844
+ " <th>555846</th>\n",
1845
+ " <td>2019-04-02 00:01:00</td>\n",
1846
+ " <td>120.4</td>\n",
1847
+ " <td>65.8</td>\n",
1848
+ " <td>85.4</td>\n",
1849
+ " <td>72.2</td>\n",
1850
+ " <td>59.5</td>\n",
1851
+ " <td>65.0</td>\n",
1852
+ " <td>78.1</td>\n",
1853
+ " <td>54.4</td>\n",
1854
+ " <td>34.0</td>\n",
1855
+ " <td>31.0</td>\n",
1856
+ " <td>15.67</td>\n",
1857
+ " <td>14.92</td>\n",
1858
+ " <td>11.77</td>\n",
1859
+ " <td>77.80</td>\n",
1860
+ " <td>147.1</td>\n",
1861
+ " </tr>\n",
1862
+ " <tr>\n",
1863
+ " <th>555847</th>\n",
1864
+ " <td>2019-04-02 00:02:00</td>\n",
1865
+ " <td>120.1</td>\n",
1866
+ " <td>65.1</td>\n",
1867
+ " <td>66.2</td>\n",
1868
+ " <td>72.1</td>\n",
1869
+ " <td>59.4</td>\n",
1870
+ " <td>64.0</td>\n",
1871
+ " <td>78.0</td>\n",
1872
+ " <td>60.1</td>\n",
1873
+ " <td>34.0</td>\n",
1874
+ " <td>31.0</td>\n",
1875
+ " <td>15.67</td>\n",
1876
+ " <td>14.92</td>\n",
1877
+ " <td>11.77</td>\n",
1878
+ " <td>77.80</td>\n",
1879
+ " <td>147.1</td>\n",
1880
+ " </tr>\n",
1881
+ " <tr>\n",
1882
+ " <th>555848</th>\n",
1883
+ " <td>2019-04-02 00:03:00</td>\n",
1884
+ " <td>119.6</td>\n",
1885
+ " <td>64.9</td>\n",
1886
+ " <td>56.0</td>\n",
1887
+ " <td>72.2</td>\n",
1888
+ " <td>59.4</td>\n",
1889
+ " <td>65.7</td>\n",
1890
+ " <td>79.1</td>\n",
1891
+ " <td>55.5</td>\n",
1892
+ " <td>34.0</td>\n",
1893
+ " <td>31.0</td>\n",
1894
+ " <td>15.67</td>\n",
1895
+ " <td>14.92</td>\n",
1896
+ " <td>11.77</td>\n",
1897
+ " <td>77.80</td>\n",
1898
+ " <td>147.1</td>\n",
1899
+ " </tr>\n",
1900
+ " <tr>\n",
1901
+ " <th>555849</th>\n",
1902
+ " <td>2019-04-02 00:04:00</td>\n",
1903
+ " <td>119.3</td>\n",
1904
+ " <td>65.5</td>\n",
1905
+ " <td>54.6</td>\n",
1906
+ " <td>72.0</td>\n",
1907
+ " <td>59.2</td>\n",
1908
+ " <td>67.1</td>\n",
1909
+ " <td>75.2</td>\n",
1910
+ " <td>53.1</td>\n",
1911
+ " <td>34.0</td>\n",
1912
+ " <td>31.0</td>\n",
1913
+ " <td>15.67</td>\n",
1914
+ " <td>14.92</td>\n",
1915
+ " <td>11.77</td>\n",
1916
+ " <td>77.80</td>\n",
1917
+ " <td>147.1</td>\n",
1918
+ " </tr>\n",
1919
+ " <tr>\n",
1920
+ " <th>...</th>\n",
1921
+ " <td>...</td>\n",
1922
+ " <td>...</td>\n",
1923
+ " <td>...</td>\n",
1924
+ " <td>...</td>\n",
1925
+ " <td>...</td>\n",
1926
+ " <td>...</td>\n",
1927
+ " <td>...</td>\n",
1928
+ " <td>...</td>\n",
1929
+ " <td>...</td>\n",
1930
+ " <td>...</td>\n",
1931
+ " <td>...</td>\n",
1932
+ " <td>...</td>\n",
1933
+ " <td>...</td>\n",
1934
+ " <td>...</td>\n",
1935
+ " <td>...</td>\n",
1936
+ " <td>...</td>\n",
1937
+ " </tr>\n",
1938
+ " <tr>\n",
1939
+ " <th>1080190</th>\n",
1940
+ " <td>2020-02-14 23:57:00</td>\n",
1941
+ " <td>121.9</td>\n",
1942
+ " <td>67.3</td>\n",
1943
+ " <td>52.8</td>\n",
1944
+ " <td>73.2</td>\n",
1945
+ " <td>63.5</td>\n",
1946
+ " <td>69.2</td>\n",
1947
+ " <td>80.9</td>\n",
1948
+ " <td>61.3</td>\n",
1949
+ " <td>0.0</td>\n",
1950
+ " <td>0.0</td>\n",
1951
+ " <td>16.42</td>\n",
1952
+ " <td>13.93</td>\n",
1953
+ " <td>6.93</td>\n",
1954
+ " <td>53.66</td>\n",
1955
+ " <td>347.9</td>\n",
1956
+ " </tr>\n",
1957
+ " <tr>\n",
1958
+ " <th>1080191</th>\n",
1959
+ " <td>2020-02-14 23:58:00</td>\n",
1960
+ " <td>122.7</td>\n",
1961
+ " <td>69.2</td>\n",
1962
+ " <td>64.8</td>\n",
1963
+ " <td>73.3</td>\n",
1964
+ " <td>63.4</td>\n",
1965
+ " <td>70.0</td>\n",
1966
+ " <td>81.0</td>\n",
1967
+ " <td>53.8</td>\n",
1968
+ " <td>0.0</td>\n",
1969
+ " <td>0.0</td>\n",
1970
+ " <td>16.42</td>\n",
1971
+ " <td>13.93</td>\n",
1972
+ " <td>6.93</td>\n",
1973
+ " <td>53.66</td>\n",
1974
+ " <td>347.9</td>\n",
1975
+ " </tr>\n",
1976
+ " <tr>\n",
1977
+ " <th>1080192</th>\n",
1978
+ " <td>2020-02-14 23:58:00</td>\n",
1979
+ " <td>122.7</td>\n",
1980
+ " <td>69.2</td>\n",
1981
+ " <td>64.8</td>\n",
1982
+ " <td>73.3</td>\n",
1983
+ " <td>63.4</td>\n",
1984
+ " <td>70.0</td>\n",
1985
+ " <td>81.0</td>\n",
1986
+ " <td>53.8</td>\n",
1987
+ " <td>0.0</td>\n",
1988
+ " <td>0.0</td>\n",
1989
+ " <td>16.42</td>\n",
1990
+ " <td>13.93</td>\n",
1991
+ " <td>6.93</td>\n",
1992
+ " <td>53.66</td>\n",
1993
+ " <td>347.9</td>\n",
1994
+ " </tr>\n",
1995
+ " <tr>\n",
1996
+ " <th>1080193</th>\n",
1997
+ " <td>2020-02-14 23:59:00</td>\n",
1998
+ " <td>122.9</td>\n",
1999
+ " <td>68.7</td>\n",
2000
+ " <td>80.8</td>\n",
2001
+ " <td>73.3</td>\n",
2002
+ " <td>63.1</td>\n",
2003
+ " <td>67.3</td>\n",
2004
+ " <td>82.2</td>\n",
2005
+ " <td>60.1</td>\n",
2006
+ " <td>0.0</td>\n",
2007
+ " <td>0.0</td>\n",
2008
+ " <td>16.42</td>\n",
2009
+ " <td>13.93</td>\n",
2010
+ " <td>6.93</td>\n",
2011
+ " <td>53.66</td>\n",
2012
+ " <td>347.9</td>\n",
2013
+ " </tr>\n",
2014
+ " <tr>\n",
2015
+ " <th>1080194</th>\n",
2016
+ " <td>2020-02-14 23:59:00</td>\n",
2017
+ " <td>122.9</td>\n",
2018
+ " <td>68.7</td>\n",
2019
+ " <td>80.8</td>\n",
2020
+ " <td>73.3</td>\n",
2021
+ " <td>63.1</td>\n",
2022
+ " <td>67.3</td>\n",
2023
+ " <td>82.2</td>\n",
2024
+ " <td>60.1</td>\n",
2025
+ " <td>0.0</td>\n",
2026
+ " <td>0.0</td>\n",
2027
+ " <td>16.42</td>\n",
2028
+ " <td>13.93</td>\n",
2029
+ " <td>6.93</td>\n",
2030
+ " <td>53.66</td>\n",
2031
+ " <td>347.9</td>\n",
2032
+ " </tr>\n",
2033
+ " </tbody>\n",
2034
+ "</table>\n",
2035
+ "<p>524350 rows × 16 columns</p>\n",
2036
+ "</div>"
2037
+ ],
2038
+ "text/plain": [
2039
+ " date hp_hws_temp rtu_003_sa_temp rtu_003_oadmpr_pct \\\n",
2040
+ "555845 2019-04-02 00:00:00 120.7 66.7 84.4 \n",
2041
+ "555846 2019-04-02 00:01:00 120.4 65.8 85.4 \n",
2042
+ "555847 2019-04-02 00:02:00 120.1 65.1 66.2 \n",
2043
+ "555848 2019-04-02 00:03:00 119.6 64.9 56.0 \n",
2044
+ "555849 2019-04-02 00:04:00 119.3 65.5 54.6 \n",
2045
+ "... ... ... ... ... \n",
2046
+ "1080190 2020-02-14 23:57:00 121.9 67.3 52.8 \n",
2047
+ "1080191 2020-02-14 23:58:00 122.7 69.2 64.8 \n",
2048
+ "1080192 2020-02-14 23:58:00 122.7 69.2 64.8 \n",
2049
+ "1080193 2020-02-14 23:59:00 122.9 68.7 80.8 \n",
2050
+ "1080194 2020-02-14 23:59:00 122.9 68.7 80.8 \n",
2051
+ "\n",
2052
+ " rtu_003_ra_temp rtu_003_oa_temp rtu_003_ma_temp \\\n",
2053
+ "555845 72.2 59.2 66.7 \n",
2054
+ "555846 72.2 59.5 65.0 \n",
2055
+ "555847 72.1 59.4 64.0 \n",
2056
+ "555848 72.2 59.4 65.7 \n",
2057
+ "555849 72.0 59.2 67.1 \n",
2058
+ "... ... ... ... \n",
2059
+ "1080190 73.2 63.5 69.2 \n",
2060
+ "1080191 73.3 63.4 70.0 \n",
2061
+ "1080192 73.3 63.4 70.0 \n",
2062
+ "1080193 73.3 63.1 67.3 \n",
2063
+ "1080194 73.3 63.1 67.3 \n",
2064
+ "\n",
2065
+ " rtu_003_sf_vfd_spd_fbk_tn rtu_003_rf_vfd_spd_fbk_tn \\\n",
2066
+ "555845 79.6 53.7 \n",
2067
+ "555846 78.1 54.4 \n",
2068
+ "555847 78.0 60.1 \n",
2069
+ "555848 79.1 55.5 \n",
2070
+ "555849 75.2 53.1 \n",
2071
+ "... ... ... \n",
2072
+ "1080190 80.9 61.3 \n",
2073
+ "1080191 81.0 53.8 \n",
2074
+ "1080192 81.0 53.8 \n",
2075
+ "1080193 82.2 60.1 \n",
2076
+ "1080194 82.2 60.1 \n",
2077
+ "\n",
2078
+ " wifi_third_south wifi_fourth_south air_temp_set_1 air_temp_set_2 \\\n",
2079
+ "555845 34.0 31.0 15.67 14.92 \n",
2080
+ "555846 34.0 31.0 15.67 14.92 \n",
2081
+ "555847 34.0 31.0 15.67 14.92 \n",
2082
+ "555848 34.0 31.0 15.67 14.92 \n",
2083
+ "555849 34.0 31.0 15.67 14.92 \n",
2084
+ "... ... ... ... ... \n",
2085
+ "1080190 0.0 0.0 16.42 13.93 \n",
2086
+ "1080191 0.0 0.0 16.42 13.93 \n",
2087
+ "1080192 0.0 0.0 16.42 13.93 \n",
2088
+ "1080193 0.0 0.0 16.42 13.93 \n",
2089
+ "1080194 0.0 0.0 16.42 13.93 \n",
2090
+ "\n",
2091
+ " dew_point_temperature_set_1d relative_humidity_set_1 \\\n",
2092
+ "555845 11.77 77.80 \n",
2093
+ "555846 11.77 77.80 \n",
2094
+ "555847 11.77 77.80 \n",
2095
+ "555848 11.77 77.80 \n",
2096
+ "555849 11.77 77.80 \n",
2097
+ "... ... ... \n",
2098
+ "1080190 6.93 53.66 \n",
2099
+ "1080191 6.93 53.66 \n",
2100
+ "1080192 6.93 53.66 \n",
2101
+ "1080193 6.93 53.66 \n",
2102
+ "1080194 6.93 53.66 \n",
2103
+ "\n",
2104
+ " solar_radiation_set_1 \n",
2105
+ "555845 147.1 \n",
2106
+ "555846 147.1 \n",
2107
+ "555847 147.1 \n",
2108
+ "555848 147.1 \n",
2109
+ "555849 147.1 \n",
2110
+ "... ... \n",
2111
+ "1080190 347.9 \n",
2112
+ "1080191 347.9 \n",
2113
+ "1080192 347.9 \n",
2114
+ "1080193 347.9 \n",
2115
+ "1080194 347.9 \n",
2116
+ "\n",
2117
+ "[524350 rows x 16 columns]"
2118
+ ]
2119
+ },
2120
+ "execution_count": 168,
2121
+ "metadata": {},
2122
+ "output_type": "execute_result"
2123
+ }
2124
+ ],
2125
+ "source": [
2126
+ "df_filtered"
2127
+ ]
2128
+ },
2129
+ {
2130
+ "cell_type": "code",
2131
+ "execution_count": 170,
2132
+ "metadata": {},
2133
+ "outputs": [
2134
+ {
2135
+ "data": {
2136
+ "text/plain": [
2137
+ "[<matplotlib.lines.Line2D at 0x1d34127ba90>]"
2138
+ ]
2139
+ },
2140
+ "execution_count": 170,
2141
+ "metadata": {},
2142
+ "output_type": "execute_result"
2143
+ }
2144
+ ],
2145
+ "source": [
2146
+ "plt.plot(dd[\"hp_hws_temp\"])\n",
2147
+ "plt.plot(dg[\"hp_hws_temp\"])"
2148
+ ]
2149
+ },
2150
+ {
2151
+ "cell_type": "code",
2152
+ "execution_count": 202,
2153
+ "metadata": {},
2154
+ "outputs": [],
2155
+ "source": [
2156
+ "from sklearn.mixture import GaussianMixture\n",
2157
+ "import numpy as np\n",
2158
+ "import matplotlib.pyplot as plt\n",
2159
+ "\n",
2160
+ "# Generating random data for demonstration\n",
2161
+ "np.random.seed(0)\n",
2162
+ "X = processed_data\n",
2163
+ "\n",
2164
+ "# Creating the GMM instance with desired number of clusters\n",
2165
+ "gmm = GaussianMixture(n_components=2)\n",
2166
+ "\n",
2167
+ "# Fitting the model to the data\n",
2168
+ "gmm.fit(X)\n",
2169
+ "\n",
2170
+ "# Getting the cluster labels\n",
2171
+ "labels = gmm.predict(X)\n",
2172
+ "\n",
2173
+ "# Plotting the data points with colors representing different clusters\n",
2174
+ "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
2175
+ "plt.title('GMM Clustering')\n",
2176
+ "plt.xlabel('Feature 1')\n",
2177
+ "plt.ylabel('Feature 2')\n",
2178
+ "plt.show()\n"
2179
+ ]
2180
+ },
2181
  {
2182
  "cell_type": "code",
2183
  "execution_count": null,