{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "from datetime import datetime \n",
    "from datetime import timedelta\n",
    "from datetime import date\n",
    "import matplotlib.pyplot as plt\n",
    "# import seaborn as sns\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from statsmodels.tsa.holtwinters import ExponentialSmoothing\n",
    "\n",
    "dataPATH = r\"C:\\Users\\levim\\OneDrive\\Documents\\MastersAI_ES\\TeamProject-5ARIP10\\smart-buildings\\Data\"\n",
    "\n",
    "### Load ALL data ###\n",
    "# all_data = pd.read_csv(dataPATH + r\"\\long_merge.csv\")\n",
    "all_data = pd.read_csv(dataPATH + r\"\\extended_energy_data.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load selection of data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepar energy data set with extended features\n",
    "feature_list = ['date', 'hvac_N', 'hvac_S', 'air_temp_set_1', 'solar_radiation_set_1']\n",
    "extended_energy_data = all_data[feature_list]\n",
    "\n",
    "extended_energy_data['date'] = pd.to_datetime(extended_energy_data['date'])\n",
    "extended_energy_data.set_index('date', inplace=True)\n",
    "\n",
    "# eed = extended energy data\n",
    "# Resampling back to 15 minutes and 1 hour\n",
    "eed_15m = extended_energy_data.resample('15T').mean()\n",
    "eed_1h = extended_energy_data.resample('60T').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "ruby"
    }
   },
   "outputs": [],
   "source": [
    "# Assuming you want to apply a moving average window of size 3 on the 'column_name' column\n",
    "window_size = 4*4 # 4 hours\n",
    "eed_15m_avg = eed_15m.copy()\n",
    "eed_15m_avg['hvac_N'] = eed_15m['hvac_N'].rolling(window=window_size).mean()\n",
    "eed_15m_avg['hvac_S'] = eed_15m['hvac_S'].rolling(window=window_size).mean()\n",
    "\n",
    "window_size = 4 # 4 hours\n",
    "eed_1h_avg = eed_1h.copy()\n",
    "eed_1h_avg['hvac_N'] = eed_1h['hvac_N'].rolling(window=window_size).mean()\n",
    "eed_1h_avg['hvac_S'] = eed_1h['hvac_S'].rolling(window=window_size).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "\n",
    "start_date  = '2018-06-02'\n",
    "end_date    = '2018-06-08'\n",
    "\n",
    "plt.plot(eed_15m['hvac_N'].loc[start_date:end_date])\n",
    "plt.plot(eed_15m_avg['hvac_N'].loc[start_date:end_date])\n",
    "plt.plot(eed_1h_avg['hvac_N'].loc[start_date:end_date])\n",
    "plt.xticks(rotation=45)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "\n",
    "plt.figure(figsize=(20,10))\n",
    "plt.plot(eed_1h['hvac_S'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Filling data gaps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fillgap(firstTS, secondTS, seasonal_periods):\n",
    "    \n",
    "    #PREPARATION\n",
    "    one = timedelta(hours=1)\n",
    "    secondTSr = secondTS[::-1].copy()\n",
    "    firstTSr = firstTS[::-1].copy()\n",
    "    indexr = pd.date_range(start=firstTS.index[0], end=secondTS.index[-1], freq='H')\n",
    "    firstTSr.index = indexr[-len(firstTSr):]\n",
    "    secondTSr.index = indexr[:len(secondTSr)]\n",
    "    \n",
    "    #FORWARD    \n",
    "    es = ExponentialSmoothing(firstTS,  seasonal_periods=seasonal_periods,seasonal='add', freq='H').fit()\n",
    "    forwardPrediction = es.predict(start=firstTS.index[-1]+one, end=secondTS.index[0]-one)\n",
    "    \n",
    "    #BACKWARD\n",
    "    es = ExponentialSmoothing(secondTSr,  seasonal_periods=seasonal_periods,seasonal='add', freq='H').fit()\n",
    "    backwardPrediction = es.predict(start=secondTSr.index[-1]+one, end=firstTSr.index[0]-one)\n",
    "    \n",
    "    #INTERPOLATION\n",
    "    l = len(forwardPrediction)\n",
    "    interpolation = pd.Series([(backwardPrediction[i] * i + forwardPrediction[i] * (l -i) )/ l for i in range(l)], index=forwardPrediction.index.copy())\n",
    "  \n",
    "    return interpolation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to split the data into multiple DataFrames based on the gaps\n",
    "def split_dfs(data):\n",
    "\n",
    "    # Prepare the DataFrame\n",
    "    df = data.copy()\n",
    "    df = df.reset_index()\n",
    "    df = df.dropna()\n",
    "    \n",
    "    # Set the maximum allowable gap (e.g., 1 hour)\n",
    "    max_gap = pd.Timedelta(hours=1)\n",
    "\n",
    "    # Calculate the differences between consecutive timestamps\n",
    "    time_diff = df['date'].diff()\n",
    "\n",
    "    # Identify gaps larger than the maximum allowable gap\n",
    "    gaps = time_diff > max_gap\n",
    "\n",
    "    # Create a new column to identify different groups\n",
    "    df['group'] = gaps.cumsum()\n",
    "\n",
    "    df.set_index('date', inplace=True)\n",
    "\n",
    "    # Split the DataFrame into a list of DataFrames based on the groups\n",
    "    dfs = [group for _, group in df.groupby('group')]\n",
    "\n",
    "    return dfs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def interpolate_gaps(data, col):\n",
    "\n",
    "    # Split the data into multiple DataFrames based on the gaps\n",
    "    dfs = split_dfs(data[[col]])\n",
    "\n",
    "    # Interpolate the gaps between the DataFrames\n",
    "    ip_df = pd.DataFrame()\n",
    "    for ii in range(len(dfs)-1):\n",
    "        seasonal_periods = max(min([len(dfs[ii]), len(dfs[ii+1])]) // 2 -  10, 2)\n",
    "        \n",
    "        if seasonal_periods > 24*7: # Using more than 1 week of seasonal patterns is not necessary\n",
    "            seasonal_periods = 24*7\n",
    "            interpolation = fillgap(dfs[ii][col], dfs[ii+1][col], seasonal_periods)\n",
    "        else:\n",
    "            interpolation = fillgap(dfs[ii][col], dfs[ii+1][col], seasonal_periods)\n",
    "\n",
    "        ip_df = pd.concat([ip_df,dfs[ii][col],interpolation])\n",
    "    \n",
    "    # Add the last DataFrame\n",
    "    ip_df = pd.concat([ip_df,dfs[-1][col]])\n",
    "\n",
    "    return ip_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# interpolation of the whole data set\n",
    "\n",
    "ip_eed_1h = pd.DataFrame()\n",
    "for ii in eed_1h.columns:\n",
    "    ip_df = interpolate_gaps(eed_1h['2018-1-2':], ii)\n",
    "    ip_eed_1h = pd.concat([ip_eed_1h, ip_df[0]], axis=1)  # axis=1 for horizontal concat\n",
    "ip_eed_1h.columns = list(eed_1h.columns)\n",
    "\n",
    "ip_eed_1h = ip_eed_1h.set_axis('date', axis=0)\n",
    "ip_eed_1h.to_csv(dataPATH + r\"\\interpolated_energy_data.csv\")\n",
    "\n",
    "ip_eed_1h.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "# plt.plot(eed_1h['hvac_N'])\n",
    "plt.plot(ip_df)\n",
    "\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "experiments",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}