levimohle commited on
Commit
6a833f6
1 Parent(s): af5e293

Added moving averaged energy data

Browse files
EnergyLSTM/EDA_lstm_energy.ipynb CHANGED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n",
13
+ " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import pandas as pd \n",
19
+ "from datetime import datetime \n",
20
+ "from datetime import date\n",
21
+ "import matplotlib.pyplot as plt\n",
22
+ "# import seaborn as sns\n",
23
+ "import numpy as np\n",
24
+ "import pandas as pd\n",
25
+ "from keras.models import Sequential\n",
26
+ "from keras.layers import LSTM, Dense\n",
27
+ "from sklearn.model_selection import train_test_split\n",
28
+ "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
29
+ "from keras.callbacks import ModelCheckpoint\n",
30
+ "\n",
31
+ "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n",
32
+ "\n",
33
+ "### Load ALL data ###\n",
34
+ "all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "markdown",
39
+ "metadata": {},
40
+ "source": [
41
+ "### Load selection of data"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 20,
47
+ "metadata": {},
48
+ "outputs": [
49
+ {
50
+ "name": "stderr",
51
+ "output_type": "stream",
52
+ "text": [
53
+ "C:\\Users\\levim\\AppData\\Local\\Temp\\ipykernel_27084\\3547628995.py:5: SettingWithCopyWarning: \n",
54
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
55
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
56
+ "\n",
57
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
58
+ " extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n"
59
+ ]
60
+ }
61
+ ],
62
+ "source": [
63
+ "# Prepar energy data set with extended features\n",
64
+ "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n",
65
+ "extended_energy_data = all_data[feature_list]\n",
66
+ "\n",
67
+ "extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n",
68
+ "extended_energy_data.set_index('date', inplace=True)\n",
69
+ "\n",
70
+ "# eed = extended energy data\n",
71
+ "# Resampling back to 15 minutes and 1 hour\n",
72
+ "eed_15m = extended_energy_data.resample('15T').mean()\n",
73
+ "eed_1h = extended_energy_data.resample('60T').mean()"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 27,
79
+ "metadata": {
80
+ "vscode": {
81
+ "languageId": "ruby"
82
+ }
83
+ },
84
+ "outputs": [],
85
+ "source": [
86
+ "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n",
87
+ "window_size = 12*4 # Half a day\n",
88
+ "eed_15m_avg = eed_15m.copy()\n",
89
+ "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
90
+ "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 26,
96
+ "metadata": {},
97
+ "outputs": [
98
+ {
99
+ "data": {
100
+ "text/plain": [
101
+ "(array([17591., 17652., 17713., 17775., 17836., 17897.]),\n",
102
+ " [Text(17591.0, 0, '2018-03'),\n",
103
+ " Text(17652.0, 0, '2018-05'),\n",
104
+ " Text(17713.0, 0, '2018-07'),\n",
105
+ " Text(17775.0, 0, '2018-09'),\n",
106
+ " Text(17836.0, 0, '2018-11'),\n",
107
+ " Text(17897.0, 0, '2019-01')])"
108
+ ]
109
+ },
110
+ "execution_count": 26,
111
+ "metadata": {},
112
+ "output_type": "execute_result"
113
+ }
114
+ ],
115
+ "source": [
116
+ "%matplotlib qt\n",
117
+ "\n",
118
+ "start_date = '2018-02-02'\n",
119
+ "end_date = '2019-02-03'\n",
120
+ "\n",
121
+ "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n",
122
+ "plt.plot(eed_15m['moving_average'].loc[start_date:end_date])\n",
123
+ "plt.xticks(rotation=45)"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": null,
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": []
132
+ }
133
+ ],
134
+ "metadata": {
135
+ "kernelspec": {
136
+ "display_name": "experiments",
137
+ "language": "python",
138
+ "name": "python3"
139
+ },
140
+ "language_info": {
141
+ "codemirror_mode": {
142
+ "name": "ipython",
143
+ "version": 3
144
+ },
145
+ "file_extension": ".py",
146
+ "mimetype": "text/x-python",
147
+ "name": "python",
148
+ "nbconvert_exporter": "python",
149
+ "pygments_lexer": "ipython3",
150
+ "version": "3.8.15"
151
+ }
152
+ },
153
+ "nbformat": 4,
154
+ "nbformat_minor": 2
155
+ }
EnergyLSTM/lstm_energy.ipynb CHANGED
@@ -2,9 +2,18 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 8,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import pandas as pd \n",
10
  "from datetime import datetime \n",
@@ -25,113 +34,19 @@
25
  },
26
  {
27
  "cell_type": "code",
28
- "execution_count": 16,
29
  "metadata": {},
30
  "outputs": [
31
  {
32
- "name": "stderr",
33
- "output_type": "stream",
34
- "text": [
35
- "C:\\Users\\levim\\AppData\\Local\\Temp\\ipykernel_12184\\1569659483.py:5: SettingWithCopyWarning: \n",
36
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
37
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
38
- "\n",
39
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
40
- " extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n"
41
  ]
42
- },
43
- {
44
- "data": {
45
- "text/html": [
46
- "<div>\n",
47
- "<style scoped>\n",
48
- " .dataframe tbody tr th:only-of-type {\n",
49
- " vertical-align: middle;\n",
50
- " }\n",
51
- "\n",
52
- " .dataframe tbody tr th {\n",
53
- " vertical-align: top;\n",
54
- " }\n",
55
- "\n",
56
- " .dataframe thead th {\n",
57
- " text-align: right;\n",
58
- " }\n",
59
- "</style>\n",
60
- "<table border=\"1\" class=\"dataframe\">\n",
61
- " <thead>\n",
62
- " <tr style=\"text-align: right;\">\n",
63
- " <th></th>\n",
64
- " <th>date</th>\n",
65
- " <th>hvac_N</th>\n",
66
- " <th>hvac_S</th>\n",
67
- " <th>air_temp_set_1</th>\n",
68
- " <th>solar_radiation_set_1</th>\n",
69
- " </tr>\n",
70
- " </thead>\n",
71
- " <tbody>\n",
72
- " <tr>\n",
73
- " <th>0</th>\n",
74
- " <td>2018-01-01 00:00:00</td>\n",
75
- " <td>NaN</td>\n",
76
- " <td>NaN</td>\n",
77
- " <td>11.5400</td>\n",
78
- " <td>51.4075</td>\n",
79
- " </tr>\n",
80
- " <tr>\n",
81
- " <th>1</th>\n",
82
- " <td>2018-01-01 01:00:00</td>\n",
83
- " <td>37.525001</td>\n",
84
- " <td>19.395</td>\n",
85
- " <td>10.8900</td>\n",
86
- " <td>2.1250</td>\n",
87
- " </tr>\n",
88
- " <tr>\n",
89
- " <th>2</th>\n",
90
- " <td>2018-01-01 02:00:00</td>\n",
91
- " <td>37.750001</td>\n",
92
- " <td>22.775</td>\n",
93
- " <td>10.7550</td>\n",
94
- " <td>0.0000</td>\n",
95
- " </tr>\n",
96
- " <tr>\n",
97
- " <th>3</th>\n",
98
- " <td>2018-01-01 03:00:00</td>\n",
99
- " <td>37.550001</td>\n",
100
- " <td>18.920</td>\n",
101
- " <td>10.4775</td>\n",
102
- " <td>0.0000</td>\n",
103
- " </tr>\n",
104
- " <tr>\n",
105
- " <th>4</th>\n",
106
- " <td>2018-01-01 04:00:00</td>\n",
107
- " <td>36.675001</td>\n",
108
- " <td>21.600</td>\n",
109
- " <td>9.9925</td>\n",
110
- " <td>0.0000</td>\n",
111
- " </tr>\n",
112
- " </tbody>\n",
113
- "</table>\n",
114
- "</div>"
115
- ],
116
- "text/plain": [
117
- " date hvac_N hvac_S air_temp_set_1 \\\n",
118
- "0 2018-01-01 00:00:00 NaN NaN 11.5400 \n",
119
- "1 2018-01-01 01:00:00 37.525001 19.395 10.8900 \n",
120
- "2 2018-01-01 02:00:00 37.750001 22.775 10.7550 \n",
121
- "3 2018-01-01 03:00:00 37.550001 18.920 10.4775 \n",
122
- "4 2018-01-01 04:00:00 36.675001 21.600 9.9925 \n",
123
- "\n",
124
- " solar_radiation_set_1 \n",
125
- "0 51.4075 \n",
126
- "1 2.1250 \n",
127
- "2 0.0000 \n",
128
- "3 0.0000 \n",
129
- "4 0.0000 "
130
- ]
131
- },
132
- "execution_count": 16,
133
- "metadata": {},
134
- "output_type": "execute_result"
135
  }
136
  ],
137
  "source": [
@@ -142,11 +57,15 @@
142
  "extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n",
143
  "extended_energy_data.set_index('date', inplace=True)\n",
144
  "\n",
145
- "extended_energy_data = extended_energy_data.resample('60T').mean()\n",
146
- "# extended_energy_data = extended_energy_data.interpolate(method='linear')\n",
 
 
 
 
 
147
  "\n",
148
- "extended_energy_data = extended_energy_data.reset_index(drop=False)\n",
149
- "extended_energy_data.head()"
150
  ]
151
  },
152
  {
@@ -156,7 +75,8 @@
156
  "outputs": [],
157
  "source": [
158
  "# energy_data = pd.read_csv(dataPATH + r\"\\hvac_data_1h.csv\")\n",
159
- "energy_data = extended_energy_data\n",
 
160
  "\n",
161
  "# Convert the date column to datetime\n",
162
  "energy_data['date'] = pd.to_datetime(energy_data['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
@@ -261,7 +181,7 @@
261
  "source": [
262
  "train,test = traindataset,testdataset\n",
263
  "steps_in_past = 3 \n",
264
- "time_step = 24\n",
265
  "no_inputs = 5\n",
266
  "no_outputs = 2\n",
267
  "def create_dataset(dataset,time_step):\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 2,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\levim\\anaconda3\\envs\\experiments\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n",
13
+ " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
14
+ ]
15
+ }
16
+ ],
17
  "source": [
18
  "import pandas as pd \n",
19
  "from datetime import datetime \n",
 
34
  },
35
  {
36
  "cell_type": "code",
37
+ "execution_count": null,
38
  "metadata": {},
39
  "outputs": [
40
  {
41
+ "ename": "NameError",
42
+ "evalue": "name 'all_data' is not defined",
43
+ "output_type": "error",
44
+ "traceback": [
45
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
46
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
47
+ "Cell \u001b[1;32mIn[1], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Prepar energy data set with extended features\u001b[39;00m\n\u001b[0;32m 2\u001b[0m feature_list \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhvac_N\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhvac_S\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mair_temp_set_1\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msolar_radiation_set_1\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m----> 3\u001b[0m extended_energy_data \u001b[38;5;241m=\u001b[39m \u001b[43mall_data\u001b[49m[feature_list]\n\u001b[0;32m 5\u001b[0m extended_energy_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mto_datetime(extended_energy_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 6\u001b[0m extended_energy_data\u001b[38;5;241m.\u001b[39mset_index(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
48
+ "\u001b[1;31mNameError\u001b[0m: name 'all_data' is not defined"
 
49
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "source": [
 
57
  "extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n",
58
  "extended_energy_data.set_index('date', inplace=True)\n",
59
  "\n",
60
+ "eed_15m = extended_energy_data.resample('15T').mean()\n",
61
+ "eed_15m = eed_15m.reset_index(drop=False)\n",
62
+ "\n",
63
+ "window_size = 12*4 # Half a day\n",
64
+ "eed_15m_avg = eed_15m.copy()\n",
65
+ "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
66
+ "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
67
  "\n",
68
+ "eed_15m.head()"
 
69
  ]
70
  },
71
  {
 
75
  "outputs": [],
76
  "source": [
77
  "# energy_data = pd.read_csv(dataPATH + r\"\\hvac_data_1h.csv\")\n",
78
+ "# energy_data = eed_15m\n",
79
+ "energy_data = eed_15m_avg\n",
80
  "\n",
81
  "# Convert the date column to datetime\n",
82
  "energy_data['date'] = pd.to_datetime(energy_data['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
 
181
  "source": [
182
  "train,test = traindataset,testdataset\n",
183
  "steps_in_past = 3 \n",
184
+ "time_step = 4*8\n",
185
  "no_inputs = 5\n",
186
  "no_outputs = 2\n",
187
  "def create_dataset(dataset,time_step):\n",
EnergyLSTM/lstm_energy_01.keras CHANGED
Binary files a/EnergyLSTM/lstm_energy_01.keras and b/EnergyLSTM/lstm_energy_01.keras differ