{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "from datetime import datetime \n",
    "from datetime import date\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from keras.models import Sequential\n",
    "from keras.layers import LSTM, Dense\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
    "from keras.callbacks import ModelCheckpoint\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "tf.config.list_physical_devices('GPU')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "merged = pd.read_csv(r'../data/long_merge.csv')\n",
    "\n",
    "zone = \"47\"\n",
    "\n",
    "if zone in [\"36\", \"37\", \"38\", \"39\", \"40\", \"41\", \"42\", \"64\", \"65\", \"66\", \"67\", \"68\", \"69\", \"70\"]:\n",
    "    rtu = \"rtu_001\"\n",
    "    wing = \"hvac_N\"\n",
    "elif zone in [\"18\", \"25\", \"26\", \"45\", \"48\", \"55\", \"56\", \"61\"]:\n",
    "    rtu = \"rtu_003\"\n",
    "    wing = \"hvac_S\"\n",
    "elif zone in [\"16\", \"17\", \"21\", \"22\", \"23\", \"24\", \"46\", \"47\", \"51\", \"52\", \"53\", \"54\"]:\n",
    "    rtu = \"rtu_004\"\n",
    "    wing = \"hvac_S\"\n",
    "else:\n",
    "    rtu = \"rtu_002\"\n",
    "    wing = \"hvac_N\"\n",
    "#merged is the dataframe\n",
    "sorted = merged[[\"date\"]+[col for col in merged.columns if zone in col or rtu in col or wing in col]+[\"hp_hws_temp\", \"aru_001_cwr_temp\" , \"aru_001_cws_fr_gpm\" ,\"aru_001_cws_temp\",\"aru_001_hwr_temp\" ,\"aru_001_hws_fr_gpm\" ,\"aru_001_hws_temp\"]]\n",
    "sorted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>hp_hws_temp</th>\n",
       "      <th>rtu_003_sat_sp_tn</th>\n",
       "      <th>rtu_003_fltrd_sa_flow_tn</th>\n",
       "      <th>rtu_003_sa_temp</th>\n",
       "      <th>rtu_003_pa_static_stpt_tn</th>\n",
       "      <th>rtu_003_oa_flow_tn</th>\n",
       "      <th>rtu_003_oadmpr_pct</th>\n",
       "      <th>rtu_003_econ_stpt_tn</th>\n",
       "      <th>rtu_003_ra_temp</th>\n",
       "      <th>...</th>\n",
       "      <th>rtu_003_rf_vfd_spd_fbk_tn</th>\n",
       "      <th>rtu_003_fltrd_gnd_lvl_plenum_press_tn</th>\n",
       "      <th>rtu_003_fltrd_lvl2_plenum_press_tn</th>\n",
       "      <th>wifi_third_south</th>\n",
       "      <th>wifi_fourth_south</th>\n",
       "      <th>air_temp_set_1</th>\n",
       "      <th>air_temp_set_2</th>\n",
       "      <th>dew_point_temperature_set_1d</th>\n",
       "      <th>relative_humidity_set_1</th>\n",
       "      <th>solar_radiation_set_1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2018-01-01 00:00:00</td>\n",
       "      <td>75.3</td>\n",
       "      <td>65.0</td>\n",
       "      <td>13558.539</td>\n",
       "      <td>65.5</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>34.6</td>\n",
       "      <td>65.0</td>\n",
       "      <td>67.9</td>\n",
       "      <td>...</td>\n",
       "      <td>49.9</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.64</td>\n",
       "      <td>11.51</td>\n",
       "      <td>8.1</td>\n",
       "      <td>79.07</td>\n",
       "      <td>86.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2018-01-01 00:01:00</td>\n",
       "      <td>75.3</td>\n",
       "      <td>65.0</td>\n",
       "      <td>13592.909</td>\n",
       "      <td>65.6</td>\n",
       "      <td>0.6</td>\n",
       "      <td>5992.059572</td>\n",
       "      <td>34.6</td>\n",
       "      <td>65.0</td>\n",
       "      <td>67.9</td>\n",
       "      <td>...</td>\n",
       "      <td>49.4</td>\n",
       "      <td>0.04</td>\n",
       "      <td>0.04</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.64</td>\n",
       "      <td>11.51</td>\n",
       "      <td>8.1</td>\n",
       "      <td>79.07</td>\n",
       "      <td>86.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  date  hp_hws_temp  rtu_003_sat_sp_tn  \\\n",
       "0  2018-01-01 00:00:00         75.3               65.0   \n",
       "1  2018-01-01 00:01:00         75.3               65.0   \n",
       "\n",
       "   rtu_003_fltrd_sa_flow_tn  rtu_003_sa_temp  rtu_003_pa_static_stpt_tn  \\\n",
       "0                 13558.539             65.5                        0.6   \n",
       "1                 13592.909             65.6                        0.6   \n",
       "\n",
       "   rtu_003_oa_flow_tn  rtu_003_oadmpr_pct  rtu_003_econ_stpt_tn  \\\n",
       "0            0.000000                34.6                  65.0   \n",
       "1         5992.059572                34.6                  65.0   \n",
       "\n",
       "   rtu_003_ra_temp  ...  rtu_003_rf_vfd_spd_fbk_tn  \\\n",
       "0             67.9  ...                       49.9   \n",
       "1             67.9  ...                       49.4   \n",
       "\n",
       "   rtu_003_fltrd_gnd_lvl_plenum_press_tn  rtu_003_fltrd_lvl2_plenum_press_tn  \\\n",
       "0                                   0.04                                0.05   \n",
       "1                                   0.04                                0.04   \n",
       "\n",
       "   wifi_third_south  wifi_fourth_south  air_temp_set_1  air_temp_set_2  \\\n",
       "0               NaN                NaN           11.64           11.51   \n",
       "1               NaN                NaN           11.64           11.51   \n",
       "\n",
       "   dew_point_temperature_set_1d  relative_humidity_set_1  \\\n",
       "0                           8.1                    79.07   \n",
       "1                           8.1                    79.07   \n",
       "\n",
       "   solar_radiation_set_1  \n",
       "0                   86.7  \n",
       "1                   86.7  \n",
       "\n",
       "[2 rows x 23 columns]"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rtu = [\"rtu_003\"]\n",
    "# wing = [\"hvac_N\",\"hvac_S\"]\n",
    "env = [\"air_temp_set_1\",\"air_temp_set_2\",\"dew_point_temperature_set_1d\",\"relative_humidity_set_1\",\"solar_radiation_set_1\"]\n",
    "wifi=[\"wifi_third_south\",\"wifi_fourth_south\"]\n",
    "[\"rtu_003_ma_temp\",]\n",
    "# any(sub in col for sub in zone) or\n",
    "energy_data = merged[[\"date\",\"hp_hws_temp\"]+[col for col in merged.columns if \n",
    "                               any(sub in col for sub in rtu) or any(sub in col for sub in wifi)]+env]\n",
    "df_filtered = energy_data[[col for col in energy_data.columns if 'Unnamed' not in col]]\n",
    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'co2' not in col]]\n",
    "df_filtered = df_filtered[[col for col in df_filtered.columns if 'templogger' not in col]]\n",
    "# df_filtered = df_filtered.dropna()\n",
    "df_filtered.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered['date'] = pd.to_datetime(df_filtered['date'], format = \"%Y-%m-%d %H:%M:%S\")\n",
    "df_filtered = df_filtered[ (df_filtered.date.dt.date >date(2018, 1, 1)) & (df_filtered.date.dt.date< date(2021, 1, 1))]\n",
    "# df_filtered.isna().sum()\n",
    "df_filtered = df_filtered.ffill()\n",
    "df_filtered = df_filtered.bfill()\n",
    "if df_filtered.isna().any().any():\n",
    "    print(\"There are NA values in the DataFrame columns.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered = df_filtered.loc[:,['date','hp_hws_temp',\n",
    " 'rtu_003_sa_temp',\n",
    " 'rtu_003_oadmpr_pct',\n",
    " 'rtu_003_ra_temp',\n",
    " 'rtu_003_oa_temp',\n",
    " 'rtu_003_ma_temp',\n",
    " 'rtu_003_sf_vfd_spd_fbk_tn',\n",
    " 'rtu_003_rf_vfd_spd_fbk_tn','wifi_third_south',\n",
    " 'wifi_fourth_south',\n",
    " 'air_temp_set_1',\n",
    " 'air_temp_set_2',\n",
    " 'dew_point_temperature_set_1d',\n",
    " 'relative_humidity_set_1',\n",
    " 'solar_radiation_set_1']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testdataset_df = df_filtered[(df_filtered.date.dt.date >date(2020, 3, 11))]\n",
    "\n",
    "# traindataset_df = df_filtered[ (df_filtered.date.dt.date >date(2019, 11, 8))]\n",
    "\n",
    "traindataset_df = df_filtered[ (df_filtered.date.dt.date <date(2020, 3, 11))]\n",
    "testdataset = testdataset_df.drop(columns=[\"date\"]).values\n",
    "\n",
    "traindataset = traindataset_df.drop(columns=[\"date\"]).values\n",
    "\n",
    "columns_with_na = traindataset_df.columns[traindataset_df.isna().any()].tolist()\n",
    "columns_with_na"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2072154"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(merged)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1157787, 909910)"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(traindataset), len(testdataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "traindataset = traindataset.astype('float32')\n",
    "testdataset = testdataset.astype('float32')\n",
    "\n",
    "scaler = StandardScaler()\n",
    "traindataset = scaler.fit_transform(traindataset)\n",
    "testdataset = scaler.transform(testdataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "9045/9045 [==============================] - ETA: 0s - loss: 0.0405\n",
      "Epoch 1: val_loss improved from inf to 0.03129, saving model to lstm_smooth_01.tf\n",
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9045/9045 [==============================] - 346s 38ms/step - loss: 0.0405 - val_loss: 0.0313\n",
      "Epoch 2/5\n",
      "9045/9045 [==============================] - ETA: 0s - loss: 0.0228\n",
      "Epoch 2: val_loss improved from 0.03129 to 0.02697, saving model to lstm_smooth_01.tf\n",
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9045/9045 [==============================] - 500s 55ms/step - loss: 0.0228 - val_loss: 0.0270\n",
      "Epoch 3/5\n",
      "9044/9045 [============================>.] - ETA: 0s - loss: 0.0211\n",
      "Epoch 3: val_loss improved from 0.02697 to 0.02597, saving model to lstm_smooth_01.tf\n",
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9045/9045 [==============================] - 389s 43ms/step - loss: 0.0211 - val_loss: 0.0260\n",
      "Epoch 4/5\n",
      "9044/9045 [============================>.] - ETA: 0s - loss: 0.0203\n",
      "Epoch 4: val_loss improved from 0.02597 to 0.02452, saving model to lstm_smooth_01.tf\n",
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Assets written to: lstm_smooth_01.tf\\assets\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9045/9045 [==============================] - 433s 48ms/step - loss: 0.0203 - val_loss: 0.0245\n",
      "Epoch 5/5\n",
      "9044/9045 [============================>.] - ETA: 0s - loss: 0.0198\n",
      "Epoch 5: val_loss did not improve from 0.02452\n",
      "9045/9045 [==============================] - 420s 46ms/step - loss: 0.0198 - val_loss: 0.0251\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.History at 0x1b4590f0250>"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train,test = traindataset,testdataset\n",
    "\n",
    "def create_dataset(dataset,time_step):\n",
    "    x = [[] for _ in range(15)] \n",
    "    Y = []\n",
    "    for i in range(len(dataset) - time_step - 1):\n",
    "        for j in range(15):\n",
    "            x[j].append(dataset[i:(i + time_step), j])\n",
    "        Y.append([dataset[i + time_step, 0],dataset[i + time_step, 1],dataset[i + time_step, 2],dataset[i + time_step, 3],dataset[i + time_step, 4],dataset[i + time_step, 5],\n",
    "                  dataset[i + time_step, 6],dataset[i + time_step, 7]])\n",
    "    x= [np.array(feature_list) for feature_list in x]\n",
    "    Y = np.reshape(Y,(len(Y),8))\n",
    "    return np.stack(x,axis=2),Y\n",
    "\n",
    "time_step = 30\n",
    "X_train, y_train = create_dataset(train, time_step)\n",
    "X_test, y_test = create_dataset(test, time_step)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))\n",
    "model.add(LSTM(units=50, return_sequences=True))\n",
    "model.add(LSTM(units=30))\n",
    "model.add(Dense(units=8))\n",
    "\n",
    "model.compile(optimizer='adam', loss='mean_squared_error')\n",
    "\n",
    "checkpoint_path = \"lstm_smooth_01.tf\"\n",
    "checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')\n",
    "# model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=128, verbose=1, callbacks=[checkpoint_callback])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1b41f862c10>"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.load_weights(checkpoint_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "28434/28434 [==============================] - 168s 6ms/step\n"
     ]
    }
   ],
   "source": [
    "test_predict1 = model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib qt\n",
    "var = 3\n",
    "plt.plot(y_test[:,var], label='Original Testing Data', color='blue')\n",
    "plt.plot(test_predict1[:,var], label='Predicted Testing Data', color='red',alpha=0.8)\n",
    "anomalies = np.where(abs(test_predict1[:,var] - y_test[:,var]) > 0.38)\n",
    "plt.scatter(anomalies,test_predict1[anomalies,var], color='black',marker =\"o\",s=100 )\n",
    "\n",
    "\n",
    "plt.title('Testing Data - Predicted vs Actual')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Value')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.mixture import GaussianMixture\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "# Generating random data for demonstration\n",
    "np.random.seed(0)\n",
    "X =  test_predict1 - y_test\n",
    "\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "# Creating the GMM instance with desired number of clusters\n",
    "gmm = GaussianMixture(n_components=2)\n",
    "\n",
    "# Fitting the model to the data\n",
    "gmm.fit(X)\n",
    "\n",
    "# Getting the cluster labels\n",
    "labels = gmm.predict(X)\n",
    "\n",
    "# Plotting the data points with colors representing different clusters\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.title('GMM Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import KMeans\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "# Generating random data for demonstration\n",
    "np.random.seed(0)\n",
    "X = (test_predict1 - y_test) * scaler.var_[0:8] + scaler.mean_[0:8]\n",
    "\n",
    "k = 6\n",
    "\n",
    "kmeans = KMeans(n_clusters=k)\n",
    "\n",
    "kmeans.fit(X)\n",
    "\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "\n",
    "# Getting the cluster centers and labels\n",
    "centroids = kmeans.cluster_centers_\n",
    "centroids = pca.transform(centroids)\n",
    "labels = kmeans.labels_\n",
    "\n",
    "# Plotting the data points and cluster centers\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
    "plt.title('KMeans Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "k = 60\n",
    "X= test_predict1 - y_test\n",
    "processed_data = []\n",
    "feat_df = pd.DataFrame(columns=[\"mean\",\"std\",])\n",
    "for i in range(0,len(X), 60):\n",
    "    mean = X[i:i+k].mean(axis = 0)\n",
    "    std = X[i:i+k].std(axis = 0)\n",
    "    max = X[i:i+k].max(axis = 0)\n",
    "    min = X[i:i+k].min(axis = 0)\n",
    "    iqr = np.percentile(X[i:i+k], 75, axis=0) - np.percentile(X[i:i+k], 25,axis=0)\n",
    "    data = np.concatenate([mean, std, max, min, iqr])\n",
    "    processed_data.append([data])\n",
    "processed_data = np.concatenate(processed_data,axis=0) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = processed_data\n",
    "\n",
    "kmeans = KMeans(n_clusters=3, algorithm='elkan', max_iter=1000, n_init = 5)\n",
    "\n",
    "kmeans.fit(X)\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "# Getting the cluster centers and labels\n",
    "centroids = kmeans.cluster_centers_\n",
    "centroids = pca.transform(centroids)\n",
    "labels = kmeans.labels_\n",
    "\n",
    "# Plotting the data points and cluster centers\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
    "plt.title('KMeans Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.mixture import GaussianMixture\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "# Generating random data for demonstration\n",
    "np.random.seed(0)\n",
    "X = processed_data\n",
    "\n",
    "# Creating the GMM instance with desired number of clusters\n",
    "gmm = GaussianMixture(n_components=3, init_params='k-means++')\n",
    "\n",
    "# Fitting the model to the data\n",
    "gmm.fit(X)\n",
    "labels = gmm.predict(X)\n",
    "\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "# Getting the cluster labels\n",
    "\n",
    "# Plotting the data points with colors representing different clusters\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.title('GMM Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import KMeans\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "# Generating random data for demonstration\n",
    "np.random.seed(0)\n",
    "X = (test_predict1 * scaler.var_[0:8] + scaler.mean_[0:8]) - (y_test * scaler.var_[0:8] + scaler.mean_[0:8])\n",
    "k = 6\n",
    "\n",
    "kmeans = KMeans(n_clusters=k)\n",
    "\n",
    "kmeans.fit(X)\n",
    "\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "X = pca.fit_transform(X)\n",
    "\n",
    "\n",
    "\n",
    "# Getting the cluster centers and labels\n",
    "centroids = kmeans.cluster_centers_\n",
    "centroids = pca.transform(centroids)\n",
    "labels = kmeans.labels_\n",
    "\n",
    "# Plotting the data points and cluster centers\n",
    "plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.5)\n",
    "plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='red', s=200, linewidths=2)\n",
    "plt.title('KMeans Clustering')\n",
    "plt.xlabel('Feature 1')\n",
    "plt.ylabel('Feature 2')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}