{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "from datetime import datetime \n", "from datetime import timedelta\n", "from datetime import date\n", "import matplotlib.pyplot as plt\n", "# import seaborn as sns\n", "import numpy as np\n", "import pandas as pd\n", "from statsmodels.tsa.holtwinters import ExponentialSmoothing\n", "\n", "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n", "\n", "### Load ALL data ###\n", "# all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")\n", "all_data = pd.read_csv(dataPATH + r\"\\extended_energy_data.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load selection of data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Prepar energy data set with extended features\n", "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n", "extended_energy_data = all_data[feature_list]\n", "\n", "extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n", "extended_energy_data.set_index('date', inplace=True)\n", "\n", "# eed = extended energy data\n", "# Resampling back to 15 minutes and 1 hour\n", "eed_15m = extended_energy_data.resample('15T').mean()\n", "eed_1h = extended_energy_data.resample('60T').mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "ruby" } }, "outputs": [], "source": [ "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n", "window_size = 4*4 # 4 hours\n", "eed_15m_avg = eed_15m.copy()\n", "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n", "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n", "\n", "window_size = 4 # 4 hours\n", "eed_1h_avg = eed_1h.copy()\n", "eed_1h_avg['hvac_N'] = eed_1h['hvac_N'].rolling(window=window_size).mean()\n", "eed_1h_avg['hvac_S'] = eed_1h['hvac_S'].rolling(window=window_size).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "\n", "start_date = '2018-06-02'\n", "end_date = '2018-06-08'\n", "\n", "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n", "plt.plot(eed_15m_avg['hvac_N'].loc[start_date:end_date])\n", "plt.plot(eed_1h_avg['hvac_N'].loc[start_date:end_date])\n", "plt.xticks(rotation=45)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "\n", "plt.figure(figsize=(20,10))\n", "plt.plot(eed_1h['hvac_S'])\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filling data gaps" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def fillgap(firstTS, secondTS, seasonal_periods):\n", " \n", " #PREPARATION\n", " one = timedelta(hours=1)\n", " secondTSr = secondTS[::-1].copy()\n", " firstTSr = firstTS[::-1].copy()\n", " indexr = pd.date_range(start=firstTS.index[0], end=secondTS.index[-1], freq='H')\n", " firstTSr.index = indexr[-len(firstTSr):]\n", " secondTSr.index = indexr[:len(secondTSr)]\n", " \n", " #FORWARD \n", " es = ExponentialSmoothing(firstTS, seasonal_periods=seasonal_periods,seasonal='add', freq='H').fit()\n", " forwardPrediction = es.predict(start=firstTS.index[-1]+one, end=secondTS.index[0]-one)\n", " \n", " #BACKWARD\n", " es = ExponentialSmoothing(secondTSr, seasonal_periods=seasonal_periods,seasonal='add', freq='H').fit()\n", " backwardPrediction = es.predict(start=secondTSr.index[-1]+one, end=firstTSr.index[0]-one)\n", " \n", " #INTERPOLATION\n", " l = len(forwardPrediction)\n", " interpolation = pd.Series([(backwardPrediction[i] * i + forwardPrediction[i] * (l -i) )/ l for i in range(l)], index=forwardPrediction.index.copy())\n", " \n", " return interpolation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Function to split the data into multiple DataFrames based on the gaps\n", "def split_dfs(data):\n", "\n", " # Prepare the DataFrame\n", " df = data.copy()\n", " df = df.reset_index()\n", " df = df.dropna()\n", " \n", " # Set the maximum allowable gap (e.g., 1 hour)\n", " max_gap = pd.Timedelta(hours=1)\n", "\n", " # Calculate the differences between consecutive timestamps\n", " time_diff = df['date'].diff()\n", "\n", " # Identify gaps larger than the maximum allowable gap\n", " gaps = time_diff > max_gap\n", "\n", " # Create a new column to identify different groups\n", " df['group'] = gaps.cumsum()\n", "\n", " df.set_index('date', inplace=True)\n", "\n", " # Split the DataFrame into a list of DataFrames based on the groups\n", " dfs = [group for _, group in df.groupby('group')]\n", "\n", " return dfs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def interpolate_gaps(data, col):\n", "\n", " # Split the data into multiple DataFrames based on the gaps\n", " dfs = split_dfs(data[[col]])\n", "\n", " # Interpolate the gaps between the DataFrames\n", " ip_df = pd.DataFrame()\n", " for ii in range(len(dfs)-1):\n", " seasonal_periods = max(min([len(dfs[ii]), len(dfs[ii+1])]) // 2 - 10, 2)\n", " \n", " if seasonal_periods > 24*7: # Using more than 1 week of seasonal patterns is not necessary\n", " seasonal_periods = 24*7\n", " interpolation = fillgap(dfs[ii][col], dfs[ii+1][col], seasonal_periods)\n", " else:\n", " interpolation = fillgap(dfs[ii][col], dfs[ii+1][col], seasonal_periods)\n", "\n", " ip_df = pd.concat([ip_df,dfs[ii][col],interpolation])\n", " \n", " # Add the last DataFrame\n", " ip_df = pd.concat([ip_df,dfs[-1][col]])\n", "\n", " return ip_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# interpolation of the whole data set\n", "\n", "ip_eed_1h = pd.DataFrame()\n", "for ii in eed_1h.columns:\n", " ip_df = interpolate_gaps(eed_1h['2018-1-2':], ii)\n", " ip_eed_1h = pd.concat([ip_eed_1h, ip_df[0]], axis=1) # axis=1 for horizontal concat\n", "ip_eed_1h.columns = list(eed_1h.columns)\n", "\n", "ip_eed_1h = ip_eed_1h.set_axis('date', axis=0)\n", "ip_eed_1h.to_csv(dataPATH + r\"\\interpolated_energy_data.csv\")\n", "\n", "ip_eed_1h.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib qt\n", "# plt.plot(eed_1h['hvac_N'])\n", "plt.plot(ip_df)\n", "\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "experiments", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" } }, "nbformat": 4, "nbformat_minor": 2 }