Spaces:

tgd1115
/

neuro-orion-v1

Sleeping

App Files Files Community

tgd1115 commited on Jan 7

Commit

8474315

verified ·

1 Parent(s): 8b1f062

Upload 12 files

Browse files

Files changed (12) hide show

.streamlit/config.toml +6 -0
src/app.py +231 -61
src/config/config.py +10 -0
src/inference.py +25 -0
src/logs/train.log +300 -0
src/pipeline/__init__.py +7 -0
src/pipeline/dataloader.py +65 -0
src/pipeline/model.py +132 -0
src/pipeline/preprocesser.py +223 -0
src/pipeline/trainer.py +176 -0
src/pipeline/utils.py +16 -0
src/train.py +44 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[theme]
+primaryColor = "#FFCC00"  # Taxi yellow
+backgroundColor = "#F0F0F0"  # Light gray resembling city streets
+secondaryBackgroundColor = "#FFFFFF"  # White for clean sidebar contrast
+textColor = "#333333"  # Dark gray for readability
+font = "sans serif"  # Modern, clean font style

src/app.py CHANGED Viewed

@@ -1,17 +1,36 @@
 import streamlit as st
 import pandas as pd
-import numpy as np
 import plotly.express as px
 import plotly.graph_objs as go
 from sklearn.preprocessing import StandardScaler
-from pyod.models.iforest import IForest
-from datetime import datetime, timedelta
 class NYCTaxiAnomalyDetector:
     def __init__(self, data):
         self.data = data.copy()
         self.scaler = StandardScaler()
     def filter_by_date_range(self, start_date, end_date):
         """
@@ -41,15 +60,16 @@ class NYCTaxiAnomalyDetector:
         :return: Scaled data and original index
         """
         # Ensure the column is numeric
-        data[column] = pd.to_numeric(data[column], errors="coerce")
         # Remove NaN values
         clean_data = data[column].dropna()
         # Scale the data
         scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))
-        return scaled_data, clean_data.index
     def detect_anomalies(self, data, column, contamination=0.05):
         """
@@ -60,77 +80,176 @@ class NYCTaxiAnomalyDetector:
         :param contamination: Expected proportion of outliers
         :return: DataFrame with anomaly detection results
         """
         # Preprocess data
-        scaled_data, original_index = self.preprocess_data(data, column)
-        # Apply Isolation Forest
-        clf = IForest(contamination=contamination, random_state=42)
-        y_pred = clf.fit_predict(scaled_data)
         # Create results DataFrame
         anomaly_results = pd.DataFrame(
             {
-                "date": original_index,
-                "value": data.loc[original_index, column],
-                "is_anomaly": y_pred == 1,
             }
         )
         return anomaly_results
 class AIContextGenerator:
     def generate_context(self, anomaly_date):
         """
-        Generate potential context for the anomaly
         :param anomaly_date: Date of the anomaly
-        :return: List of contextual insights
         """
-        # Mock contextual insights - replace with actual data sources
-        contexts = [
-            {
-                "type": "Weather",
-                "description": f"Weather conditions on {anomaly_date.date()}",
-                "severity": "High",
-            },
-            {
-                "type": "Event",
-                "description": f"City events around {anomaly_date.date()}",
-                "severity": "Medium",
-            },
-            {
-                "type": "Economic",
-                "description": f"Economic factors on {anomaly_date.date()}",
-                "severity": "Low",
-            },
-        ]
-        return contexts
-def load_nyc_taxi_data():
     """
-    Load and preprocess NYC Taxi dataset
-    :return: DataFrame with synthetic taxi traffic data
     """
-    # Synthetic data generation
-    dates = pd.date_range(start="2023-01-01", end="2023-12-31", freq="D")
-    base_traffic = np.random.normal(5000, 500, len(dates))
-    # Introduce some anomalies
-    base_traffic[50] = 10000  # Extreme spike
-    base_traffic[200] = 500  # Extreme drop
-    base_traffic[300] = 12000  # Another spike
-    df = pd.DataFrame({"date": dates, "daily_traffic": base_traffic})
     return df
 def main():
     st.set_page_config(
-        page_title="NYC Taxi Traffic Anomaly Detection", page_icon="🚕", layout="wide"
     )
     st.title("🚕 NYC Taxi Traffic Anomaly Detection")
@@ -180,8 +299,16 @@ def main():
         filtered_data, "daily_traffic", contamination=anomaly_threshold
     )
     # Visualization
-    st.header("Daily Taxi Traffic Trend")
     fig = px.line(
         filtered_data,
         x="date",
@@ -191,7 +318,6 @@ def main():
     )
     # Highlight Anomalies
-    anomaly_points = filtered_data[anomalies["is_anomaly"]]
     fig.add_trace(
         go.Scatter(
             x=anomaly_points["date"],
@@ -205,29 +331,73 @@ def main():
     st.plotly_chart(fig, use_container_width=True)
     # Anomaly Details
-    st.header("Anomaly Insights")
-    if not anomaly_points.empty:
         context_generator = AIContextGenerator()
-        for _, anomaly in anomaly_points.iterrows():
-            st.subheader(f"Anomaly on {anomaly['date'].date()}")
             col1, col2 = st.columns(2)
             with col1:
-                st.metric("Taxi Rides", f"{anomaly['daily_traffic']:.0f}")
             with col2:
                 contexts = context_generator.generate_context(anomaly["date"])
-                st.write("### Potential Context")
-                for context in contexts:
-                    st.markdown(
-                        f"""
-                    - **{context['type']}**: {context['description']}
-                      (Severity: {context['severity']})
-                    """
-                    )
     else:
         st.info("No significant anomalies detected with current settings.")

 import streamlit as st
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objs as go
+import numpy as np
 from sklearn.preprocessing import StandardScaler
+from dataclasses import dataclass
+from datetime import datetime
+import torch
+import torch.nn as nn
+import os
+from torch.utils.data import DataLoader, TensorDataset
+from path_config import MODEL_DIR
+from pipeline import Transformer
+@dataclass()
 class NYCTaxiAnomalyDetector:
     def __init__(self, data):
         self.data = data.copy()
         self.scaler = StandardScaler()
+        self.model = None
+        self.TRANSFORMER_S_MODEL_PATH = os.path.join(
+            MODEL_DIR, "transformer_model_small.pth"
+        )
+    def create_sequences(self, data, seq_length=24):
+        """Create sequences for the transformer model"""
+        sequences = []
+        values = data.reshape(-1, 1)
+        for i in range(len(values) - seq_length + 1):
+            sequences.append(values[i : i + seq_length])
+        return np.array(sequences)
     def filter_by_date_range(self, start_date, end_date):
         """
         :return: Scaled data and original index
         """
         # Ensure the column is numeric
+        data.loc[:, column] = pd.to_numeric(data[column], errors="coerce")
         # Remove NaN values
         clean_data = data[column].dropna()
         # Scale the data
         scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))
+        sequences = self.create_sequences(scaled_data)
+        return sequences, clean_data.index[23:]
     def detect_anomalies(self, data, column, contamination=0.05):
         """
         :param contamination: Expected proportion of outliers
         :return: DataFrame with anomaly detection results
         """
+        if self.model is None:
+            self.model = Transformer()
+            self.model.load_state_dict(
+                torch.load(self.TRANSFORMER_S_MODEL_PATH, weights_only=True)
+            )
+            self.model.eval()
         # Preprocess data
+        sequences, original_index = self.preprocess_data(data, column)
+        # Create DataLoader
+        dataset = TensorDataset(torch.FloatTensor(sequences))
+        test_loader = DataLoader(dataset, batch_size=32, shuffle=False)
+        # Calculate threshold percentile from contamination
+        threshold_percentile = (1 - contamination) * 100
+        # Detect anomalies
+        reconstruction_errors, predictions, anomalies, optimal_threshold = (
+            self.detect_anomalies_batch(
+                self.model, test_loader, threshold_percentile=threshold_percentile
+            )
+        )
         # Create results DataFrame
         anomaly_results = pd.DataFrame(
             {
+                "date": data.loc[original_index, "date"],
+                column: data.loc[original_index, column],
+                "is_anomaly": anomalies,
+                "reconstruction_error": reconstruction_errors,
+                "prediction": predictions,
             }
         )
         return anomaly_results
+    def detect_anomalies_batch(self, model, test_loader, threshold_percentile=99.7):
+        """Detect anomalies in batches"""
+        reconstruction_errors = []
+        predictions = []
+        with torch.no_grad():
+            for seq_true in test_loader:
+                x = seq_true[0]  # Remove extra dimension from TensorDataset
+                pred = model(x)
+                # Calculate reconstruction error for each sequence
+                errors = torch.mean(
+                    torch.abs(pred - x), dim=(1, 2)
+                )  # Mean over sequence length and features
+                reconstruction_errors.extend(errors.cpu().numpy())
+                predictions.extend(
+                    pred[:, -1, 0].cpu().numpy()
+                )  # Take last timestep prediction
+        reconstruction_errors = np.array(reconstruction_errors)
+        predictions = np.array(predictions)
+        optimal_threshold = np.percentile(reconstruction_errors, threshold_percentile)
+        anomalies = (reconstruction_errors > optimal_threshold).astype(int)
+        return reconstruction_errors, predictions, anomalies, optimal_threshold
+@dataclass()
 class AIContextGenerator:
+    predefined_anomalies = {
+        datetime(2014, 11, 2).date(): [
+            {
+                "type": "NYC Marathon",
+                "description": "No significant anomalies detected with current settings. No significant anomalies detected with current settings. No significant anomalies detected with current settings.",
+                "reference": None,
+            }
+        ],
+        datetime(2014, 11, 27).date(): [
+            {
+                "type": "Thanksgiving Day",
+                "description": "Thanksgiving Day",
+                "reference": None,
+            }
+        ],
+        datetime(2014, 12, 25).date(): [
+            {
+                "type": "Christmas",
+                "description": "Christmas",
+                "reference": {
+                    "text": "NYC Marathon 2014",
+                    "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
+                },
+            }
+        ],
+        datetime(2015, 1, 1).date(): [
+            {
+                "type": "Event",
+                "description": "New Year's Day",
+                "reference": {
+                    "text": "NYC Marathon 2014",
+                    "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
+                },
+            }
+        ],
+        datetime(2015, 1, 26).date(): [
+            {
+                "type": "Event",
+                "description": "Snowstorm",
+                "reference": {
+                    "text": "NYC Marathon 2014",
+                    "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
+                },
+            }
+        ],
+        datetime(2015, 1, 27).date(): [
+            {
+                "type": "Event",
+                "description": "Snowstorm",
+                "reference": {
+                    "text": "NYC Marathon 2014",
+                    "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
+                },
+            }
+        ],
+        datetime(2014, 7, 1).date(): [
+            {
+                "type": "Event",
+                "description": "Testing",
+                "reference": {
+                    "text": "NYC Marathon 2014",
+                    "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
+                },
+            }
+        ],
+    }
     def generate_context(self, anomaly_date):
         """
+        Generate potential context for the anomaly if predefined
         :param anomaly_date: Date of the anomaly
+        :return: List of contextual insights if available, else None
         """
+        if isinstance(anomaly_date, pd.Timestamp):
+            anomaly_date = anomaly_date.date()
+        return self.predefined_anomalies.get(anomaly_date, None)
+def load_nyc_taxi_data(file_path="data/nyc_taxi_traffic_data.csv"):
     """
+    Load and preprocess NYC Taxi dataset from a CSV file.
+    :return: DataFrame with taxi traffic data
     """
+    # Load the CSV file
+    df = pd.read_csv(file_path)
+    # Ensure timestamp column is datetime and rename columns for consistency
+    df["timestamp"] = pd.to_datetime(df["timestamp"])
+    df.rename(columns={"timestamp": "date", "value": "daily_traffic"}, inplace=True)
+    # Sort by date to ensure proper time-series ordering
+    df = df.sort_values(by="date").reset_index(drop=True)
     return df
 def main():
     st.set_page_config(
+        page_title="NYC Taxi Traffic Anomaly Detection",
+        page_icon="🚕",
+        layout="wide",
+        initial_sidebar_state="expanded",
     )
     st.title("🚕 NYC Taxi Traffic Anomaly Detection")
         filtered_data, "daily_traffic", contamination=anomaly_threshold
     )
+    # Get anomaly points for visualization
+    anomaly_points = anomalies[anomalies["is_anomaly"] == 1]
+    # Filter true anomalies based on predefined anomalies
+    true_anomaly_points = anomaly_points[
+        anomaly_points["date"].dt.date.isin(AIContextGenerator.predefined_anomalies)
+    ]
     # Visualization
+    st.header("Daily Taxi Traffic Trend ✨")
     fig = px.line(
         filtered_data,
         x="date",
     )
     # Highlight Anomalies
     fig.add_trace(
         go.Scatter(
             x=anomaly_points["date"],
     st.plotly_chart(fig, use_container_width=True)
     # Anomaly Details
+    st.header("Insights of Anomalies with Known Events 📈")
+    # Calculate metrics using the anomalies DataFrame
+    total_anomalies_detected = len(anomaly_points)
+    true_anomalies = len(true_anomaly_points)
+    false_anomalies = total_anomalies_detected - true_anomalies
+    st.sidebar.subheader("Summary")
+    st.sidebar.metric("Total Anomalies Detected:", total_anomalies_detected)
+    st.sidebar.metric("Anomalies with Known Events:", true_anomalies)
+    st.sidebar.metric("Unexplained Anomalies:", false_anomalies)
+    if not true_anomaly_points.empty:
         context_generator = AIContextGenerator()
+        # Group by date and calculate min/max traffic
+        grouped_anomalies = (
+            true_anomaly_points.groupby(true_anomaly_points["date"].dt.date)
+            .agg({"daily_traffic": ["min", "max"]})
+            .reset_index()
+        )
+        # Flatten the multi-level columns
+        grouped_anomalies.columns = ["date", "min_traffic", "max_traffic"]
+        for _, anomaly in grouped_anomalies.iterrows():
             col1, col2 = st.columns(2)
             with col1:
+                st.subheader(f"Anomaly on {anomaly['date']}")
+                traffic_range = (
+                    f"{anomaly['min_traffic']:.0f}-{anomaly['max_traffic']:.0f}"
+                )
+                st.metric("Taxi Rides Range", traffic_range)
             with col2:
                 contexts = context_generator.generate_context(anomaly["date"])
+                if contexts:
+                    for context in contexts:
+                        st.subheader(f"Event: {context['type']}")
+                        reference_text = (
+                            context["reference"]["text"]
+                            if context["reference"]
+                            else "-"
+                        )
+                        reference_url = (
+                            context["reference"]["url"]
+                            if context["reference"] and context["reference"]["url"]
+                            else ""
+                        )
+                        url = (
+                            f"[{reference_text}]({reference_url})"
+                            if reference_url
+                            else reference_text
+                        )
+                        st.markdown(
+                            f"""
+                        - Description: {context['description']}
+                        - Reference: {url}
+                """
+                        )
+                else:
+                    st.write("No significant event available for this anomaly.")
     else:
         st.info("No significant anomalies detected with current settings.")

src/config/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import os
+import logging
+from path_config import LOG_DIR
+def setup_logging():
+    logging.basicConfig(
+        filename=os.path.join(LOG_DIR, 'train.log'),
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )

src/inference.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from src.config.config import setup_logging
+from src.pipeline import NYCDataLoader, VanillaLSTM, Transformer, VAE, AnomalyDetector
+def inference():
+    seq_length = 48
+    setup_logging()
+    # Load the preprocessed data
+    data_loader = NYCDataLoader(batch_size=32)
+    train_loader, _, test_loader = data_loader.load_data()
+    # Get the true anomalies
+    true_anomalies = data_loader.get_true_anomalies()
+    # Initialize the AnomalyDetector
+    detector = AnomalyDetector()
+    # Load the trained models
+    detector.load_data(test_loader=test_loader)
+    detector.load_trained_model("transformer_model.pth", model_type="transformer")

src/logs/train.log ADDED Viewed

	@@ -0,0 +1,300 @@

+2025-01-06 14:24:18,937 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:24:19,021 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:24:19,021 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:25:05,900 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:25:05,939 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:25:05,939 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:25:05,952 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 14:25:05,954 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 14:25:05,956 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 14:25:05,962 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
+2025-01-06 14:25:05,970 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 14:25:05,973 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 14:25:05,974 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 14:25:05,975 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 14:25:05,976 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 14:25:05,977 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 14:25:05,978 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 14:30:56,989 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:30:57,016 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:30:57,016 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:30:57,026 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 14:30:57,027 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 14:30:57,028 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 14:30:57,031 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
+2025-01-06 14:30:57,038 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 14:30:57,040 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 14:30:57,041 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 14:30:57,042 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 14:30:57,043 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 14:30:57,043 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 14:30:57,044 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 14:30:57,122 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 14:31:42,480 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:31:42,498 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:31:42,498 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:31:42,506 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 14:31:42,508 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 14:31:42,509 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 14:31:42,512 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
+2025-01-06 14:31:42,518 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 14:31:42,519 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 14:31:42,521 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 14:31:42,523 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 14:31:42,524 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 14:31:42,525 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 14:31:42,525 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 14:31:42,584 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 14:32:10,258 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:32:10,275 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:32:10,275 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:32:10,284 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 14:32:10,285 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 14:32:10,286 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 14:32:10,291 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
+2025-01-06 14:32:10,297 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 14:32:10,307 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 14:32:10,310 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 14:32:10,314 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 14:32:10,316 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 14:32:10,318 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 14:32:10,319 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 14:32:10,393 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 14:33:33,454 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:33:33,473 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:33:33,473 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:33:33,482 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 14:33:33,482 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 14:33:33,483 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 14:33:33,488 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
+2025-01-06 14:33:33,494 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 14:33:33,498 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 14:33:33,499 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 14:33:33,501 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 14:33:33,501 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 14:33:33,502 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 14:33:33,502 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 14:33:53,220 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 14:33:53,240 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 14:33:53,240 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 14:33:53,247 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 14:33:53,248 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 14:33:53,249 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 14:33:53,253 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
+2025-01-06 14:33:53,260 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 14:33:53,261 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 14:33:53,263 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 14:33:53,264 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 14:33:53,265 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 14:33:53,266 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 14:33:53,267 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 14:33:53,320 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 14:33:53,320 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 14:33:55,517 - pipeline.trainer - INFO - Start training...
+2025-01-06 14:33:56,930 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.3034, Val Loss: 0.0795
+2025-01-06 14:33:58,164 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0653, Val Loss: 0.0455
+2025-01-06 14:33:59,489 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0439, Val Loss: 0.0350
+2025-01-06 14:34:00,754 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0357, Val Loss: 0.0351
+2025-01-06 14:34:02,119 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0337, Val Loss: 0.0251
+2025-01-06 14:34:03,447 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0293, Val Loss: 0.0248
+2025-01-06 14:34:04,771 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0268, Val Loss: 0.0235
+2025-01-06 14:34:06,231 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0273, Val Loss: 0.0215
+2025-01-06 14:34:07,567 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0244, Val Loss: 0.0182
+2025-01-06 14:34:08,960 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0242, Val Loss: 0.0190
+2025-01-06 14:34:10,369 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0234, Val Loss: 0.0340
+2025-01-06 14:34:11,813 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0214, Val Loss: 0.0185
+2025-01-06 14:34:13,487 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0204, Val Loss: 0.0165
+2025-01-06 14:34:15,658 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0202, Val Loss: 0.0166
+2025-01-06 14:34:17,142 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0194, Val Loss: 0.0156
+2025-01-06 14:34:18,893 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0184, Val Loss: 0.0183
+2025-01-06 14:34:20,405 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0181, Val Loss: 0.0166
+2025-01-06 14:34:21,905 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0177, Val Loss: 0.0164
+2025-01-06 14:34:23,342 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0173, Val Loss: 0.0163
+2025-01-06 14:34:24,784 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0169, Val Loss: 0.0137
+2025-01-06 14:34:24,784 - pipeline.trainer - INFO - Training completed!
+2025-01-06 14:34:24,785 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 14:34:24,786 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 14:34:24,786 - pipeline.trainer - INFO - Start training...
+2025-01-06 14:34:26,589 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 1.0037, Val Loss: 1.1303
+2025-01-06 14:34:28,275 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.9997, Val Loss: 1.1426
+2025-01-06 14:34:30,034 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.9988, Val Loss: 1.1378
+2025-01-06 14:34:31,756 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.9897, Val Loss: 1.0285
+2025-01-06 14:34:33,542 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.8906, Val Loss: 0.9805
+2025-01-06 14:34:35,897 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.8219, Val Loss: 0.8182
+2025-01-06 14:34:37,835 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.7519, Val Loss: 0.7566
+2025-01-06 14:34:39,565 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.7407, Val Loss: 0.7998
+2025-01-06 14:34:41,248 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.7306, Val Loss: 0.7671
+2025-01-06 14:34:43,010 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.7294, Val Loss: 0.7305
+2025-01-06 14:34:44,642 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.7223, Val Loss: 0.7628
+2025-01-06 14:34:46,323 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.7174, Val Loss: 0.7732
+2025-01-06 14:34:48,097 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.7136, Val Loss: 0.7379
+2025-01-06 14:34:49,784 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.7142, Val Loss: 0.7372
+2025-01-06 14:34:51,476 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.7131, Val Loss: 0.7190
+2025-01-06 14:34:53,172 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.7203, Val Loss: 0.7440
+2025-01-06 14:34:54,822 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.7083, Val Loss: 0.7466
+2025-01-06 14:34:56,503 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.7139, Val Loss: 0.7061
+2025-01-06 14:34:58,186 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.7102, Val Loss: 0.7272
+2025-01-06 14:34:59,852 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.7139, Val Loss: 0.7414
+2025-01-06 14:34:59,852 - pipeline.trainer - INFO - Training completed!
+2025-01-06 14:34:59,871 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 14:34:59,871 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 14:34:59,872 - pipeline.trainer - INFO - Start training...
+2025-01-06 14:35:08,324 - pipeline.trainer - INFO - Epoch 1/5, Train Loss: 0.0648, Val Loss: 0.0018
+2025-01-06 14:35:15,897 - pipeline.trainer - INFO - Epoch 2/5, Train Loss: 0.0059, Val Loss: 0.0024
+2025-01-06 14:35:23,713 - pipeline.trainer - INFO - Epoch 3/5, Train Loss: 0.0032, Val Loss: 0.0012
+2025-01-06 14:35:31,344 - pipeline.trainer - INFO - Epoch 4/5, Train Loss: 0.0022, Val Loss: 0.0005
+2025-01-06 14:35:39,079 - pipeline.trainer - INFO - Epoch 5/5, Train Loss: 0.0016, Val Loss: 0.0002
+2025-01-06 14:35:39,079 - pipeline.trainer - INFO - Training completed!
+2025-01-06 19:00:28,816 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 19:00:28,853 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 19:00:28,853 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 19:00:28,876 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 19:00:28,879 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 19:00:28,881 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 19:00:28,889 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
+2025-01-06 19:00:28,896 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 19:00:28,898 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 19:00:28,899 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 19:00:28,900 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 19:00:28,901 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 19:00:28,902 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 19:00:28,903 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 19:00:28,986 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 19:00:28,987 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 19:00:31,247 - pipeline.trainer - INFO - Start training...
+2025-01-06 19:00:32,247 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2837, Val Loss: 0.0671
+2025-01-06 19:00:33,049 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0577, Val Loss: 0.0383
+2025-01-06 19:00:34,549 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0440, Val Loss: 0.0301
+2025-01-06 19:00:35,469 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0352, Val Loss: 0.0342
+2025-01-06 19:00:36,504 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0309, Val Loss: 0.0212
+2025-01-06 19:00:37,677 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0281, Val Loss: 0.0234
+2025-01-06 19:00:39,663 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0254, Val Loss: 0.0234
+2025-01-06 19:00:41,030 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0242, Val Loss: 0.0203
+2025-01-06 19:00:42,704 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0233, Val Loss: 0.0263
+2025-01-06 19:00:44,501 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0226, Val Loss: 0.0194
+2025-01-06 19:00:45,647 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0215, Val Loss: 0.0191
+2025-01-06 19:00:47,411 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0223, Val Loss: 0.0179
+2025-01-06 19:00:48,595 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0214, Val Loss: 0.0223
+2025-01-06 19:00:49,564 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0200, Val Loss: 0.0178
+2025-01-06 19:00:50,591 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0193, Val Loss: 0.0186
+2025-01-06 19:00:51,548 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0187, Val Loss: 0.0172
+2025-01-06 19:00:52,543 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0184, Val Loss: 0.0159
+2025-01-06 19:00:53,596 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0178, Val Loss: 0.0173
+2025-01-06 19:00:54,580 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0175, Val Loss: 0.0154
+2025-01-06 19:00:55,473 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0169, Val Loss: 0.0169
+2025-01-06 19:00:55,473 - pipeline.trainer - INFO - Training completed!
+2025-01-06 19:00:55,475 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 19:00:55,476 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 19:00:55,477 - pipeline.trainer - INFO - Start training...
+2025-01-06 19:02:52,646 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 19:02:52,671 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 19:02:52,671 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 19:02:52,680 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 19:02:52,681 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 19:02:52,682 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 19:02:52,686 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
+2025-01-06 19:02:52,693 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 19:02:52,694 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 19:02:52,695 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 19:02:52,696 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 19:02:52,697 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 19:02:52,697 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 19:02:52,698 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 19:02:52,782 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 19:02:52,784 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 19:02:54,413 - pipeline.trainer - INFO - Start training...
+2025-01-06 19:02:55,398 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2684, Val Loss: 0.0641
+2025-01-06 19:02:56,284 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0561, Val Loss: 0.0374
+2025-01-06 19:02:57,174 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0403, Val Loss: 0.0275
+2025-01-06 19:02:58,048 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0344, Val Loss: 0.0240
+2025-01-06 19:02:59,059 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0273, Val Loss: 0.0220
+2025-01-06 19:03:00,078 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0267, Val Loss: 0.0213
+2025-01-06 19:03:01,054 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0242, Val Loss: 0.0194
+2025-01-06 19:03:02,053 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0240, Val Loss: 0.0184
+2025-01-06 19:03:02,954 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0222, Val Loss: 0.0183
+2025-01-06 19:03:03,976 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0218, Val Loss: 0.0200
+2025-01-06 19:03:04,897 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0211, Val Loss: 0.0181
+2025-01-06 19:03:05,807 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0197, Val Loss: 0.0171
+2025-01-06 19:03:07,057 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0197, Val Loss: 0.0185
+2025-01-06 19:03:08,468 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0191, Val Loss: 0.0181
+2025-01-06 19:03:09,581 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0188, Val Loss: 0.0186
+2025-01-06 19:03:10,485 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0185, Val Loss: 0.0185
+2025-01-06 19:03:11,373 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0181, Val Loss: 0.0164
+2025-01-06 19:03:12,261 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0181, Val Loss: 0.0179
+2025-01-06 19:03:13,120 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0170, Val Loss: 0.0153
+2025-01-06 19:03:14,041 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0166, Val Loss: 0.0141
+2025-01-06 19:03:14,041 - pipeline.trainer - INFO - Training completed!
+2025-01-06 19:03:29,028 - pipeline.preprocesser - INFO - Loading raw data...
+2025-01-06 19:03:29,058 - pipeline.preprocesser - INFO - Detecting anomalies...
+2025-01-06 19:03:29,058 - pipeline.preprocesser - INFO - Labeling anomalies...
+2025-01-06 19:03:29,069 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
+2025-01-06 19:03:29,070 - pipeline.preprocesser - INFO - Splitting the data into features and target...
+2025-01-06 19:03:29,072 - pipeline.preprocesser - INFO - Scaling the data...
+2025-01-06 19:03:29,077 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
+2025-01-06 19:03:29,087 - pipeline.preprocesser - INFO - Saving the preprocessed data...
+2025-01-06 19:03:29,088 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
+2025-01-06 19:03:29,089 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
+2025-01-06 19:03:29,092 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
+2025-01-06 19:03:29,093 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
+2025-01-06 19:03:29,093 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
+2025-01-06 19:03:29,094 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
+2025-01-06 19:03:29,156 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 19:03:29,157 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 19:03:30,542 - pipeline.trainer - INFO - Start training...
+2025-01-06 19:03:31,393 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2989, Val Loss: 0.0746
+2025-01-06 19:03:32,272 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0619, Val Loss: 0.0432
+2025-01-06 19:03:33,148 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0434, Val Loss: 0.0294
+2025-01-06 19:03:34,056 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0372, Val Loss: 0.0289
+2025-01-06 19:03:34,932 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0334, Val Loss: 0.0241
+2025-01-06 19:03:35,838 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0306, Val Loss: 0.0254
+2025-01-06 19:03:36,718 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0296, Val Loss: 0.0218
+2025-01-06 19:03:37,602 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0263, Val Loss: 0.0228
+2025-01-06 19:03:38,475 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0253, Val Loss: 0.0266
+2025-01-06 19:03:39,344 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0247, Val Loss: 0.0200
+2025-01-06 19:03:40,207 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0234, Val Loss: 0.0206
+2025-01-06 19:03:41,117 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0224, Val Loss: 0.0198
+2025-01-06 19:03:41,980 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0220, Val Loss: 0.0188
+2025-01-06 19:03:42,848 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0215, Val Loss: 0.0194
+2025-01-06 19:03:43,749 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0210, Val Loss: 0.0217
+2025-01-06 19:03:44,659 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0204, Val Loss: 0.0208
+2025-01-06 19:03:45,532 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0207, Val Loss: 0.0208
+2025-01-06 19:03:46,410 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0204, Val Loss: 0.0252
+2025-01-06 19:03:47,283 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0204, Val Loss: 0.0193
+2025-01-06 19:03:48,166 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0191, Val Loss: 0.0184
+2025-01-06 19:03:48,166 - pipeline.trainer - INFO - Training completed!
+2025-01-06 19:03:48,168 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 19:03:48,168 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 19:03:48,169 - pipeline.trainer - INFO - Start training...
+2025-01-06 19:03:49,381 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 1.0015, Val Loss: 1.0956
+2025-01-06 19:03:50,607 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.8309, Val Loss: 0.8594
+2025-01-06 19:03:52,089 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.7853, Val Loss: 0.8332
+2025-01-06 19:03:53,445 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.7632, Val Loss: 0.8482
+2025-01-06 19:03:54,595 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.7387, Val Loss: 0.7666
+2025-01-06 19:03:55,804 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.6963, Val Loss: 0.6869
+2025-01-06 19:03:56,969 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.6593, Val Loss: 0.6919
+2025-01-06 19:03:58,135 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.6531, Val Loss: 0.6691
+2025-01-06 19:03:59,274 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.6444, Val Loss: 0.6625
+2025-01-06 19:04:00,445 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.6473, Val Loss: 0.6644
+2025-01-06 19:04:01,577 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.6338, Val Loss: 0.6757
+2025-01-06 19:04:02,737 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.6356, Val Loss: 0.6671
+2025-01-06 19:04:03,890 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.6390, Val Loss: 0.6591
+2025-01-06 19:04:05,140 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.6335, Val Loss: 0.6530
+2025-01-06 19:04:06,665 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.6349, Val Loss: 0.6527
+2025-01-06 19:04:08,485 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.6383, Val Loss: 0.6734
+2025-01-06 19:04:09,881 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.6310, Val Loss: 0.6583
+2025-01-06 19:04:11,073 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.6340, Val Loss: 0.6496
+2025-01-06 19:04:12,325 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.6363, Val Loss: 0.6392
+2025-01-06 19:04:13,552 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.6296, Val Loss: 0.6535
+2025-01-06 19:04:13,552 - pipeline.trainer - INFO - Training completed!
+2025-01-06 19:04:13,561 - pipeline.trainer - INFO - Initialize the model...
+2025-01-06 19:04:13,562 - pipeline.trainer - INFO - Configure the training parameters...
+2025-01-06 19:04:13,562 - pipeline.trainer - INFO - Start training...
+2025-01-06 19:04:18,489 - pipeline.trainer - INFO - Epoch 1/5, Train Loss: 0.0816, Val Loss: 0.0026
+2025-01-06 19:04:23,306 - pipeline.trainer - INFO - Epoch 2/5, Train Loss: 0.0082, Val Loss: 0.0021
+2025-01-06 19:04:28,844 - pipeline.trainer - INFO - Epoch 3/5, Train Loss: 0.0046, Val Loss: 0.0012
+2025-01-06 19:04:33,885 - pipeline.trainer - INFO - Epoch 4/5, Train Loss: 0.0031, Val Loss: 0.0006
+2025-01-06 19:04:38,925 - pipeline.trainer - INFO - Epoch 5/5, Train Loss: 0.0024, Val Loss: 0.0007
+2025-01-06 19:04:38,925 - pipeline.trainer - INFO - Training completed!
+2025-01-06 19:04:38,939 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\lstm_model_small.pth
+2025-01-06 19:04:38,943 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\vae_model_small.pth
+2025-01-06 19:04:38,951 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\transformer_model_small.pth

src/pipeline/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .dataloader import NYCDataLoader
+from .preprocesser import Preprocessor
+from .model import (VanillaLSTM,
+                   Transformer,
+                   VAE)
+from .trainer import Trainer
+from .utils import save_model

src/pipeline/dataloader.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Class for loading data from the dataset
+import os
+import logging
+import pandas as pd
+import torch
+import numpy as np
+import pandas as pd
+from torch.utils.data import TensorDataset, DataLoader
+from path_config import (
+    RAW_DATA_PATH,
+    TRAIN_FEATURES_PATH,
+    TRAIN_LABELS_PATH,
+    VAL_FEATURES_PATH,
+    VAL_LABELS_PATH,
+    TEST_FEATURES_PATH,
+    TEST_LABELS_PATH,
+)
+class NYCDataLoader:
+    def __init__(self, batch_size):
+        self.batch_size = batch_size
+        self.train_features = None
+        self.train_labels = None
+        self.val_features = None
+        self.val_labels = None
+        self.test_features = None
+        self.test_labels = None
+        self.logger = logging.getLogger(__name__)
+    def create_tensor(self):
+        """
+        Load the preprocessed data and convert them to tensors
+        """
+        try:
+            self.train_features = torch.tensor(np.load(TRAIN_FEATURES_PATH)).float()
+            self.train_labels = torch.tensor(np.load(TRAIN_LABELS_PATH)).float()
+            self.val_features = torch.tensor(np.load(VAL_FEATURES_PATH)).float()
+            self.val_labels = torch.tensor(np.load(VAL_LABELS_PATH)).float()
+            self.test_features = torch.tensor(np.load(TEST_FEATURES_PATH)).float()
+            self.test_labels = torch.tensor(np.load(TEST_LABELS_PATH)).float()
+        except FileNotFoundError:
+            print("Preprocessed data not found. Please run the preprocessing script first.")
+    def load_data(self):
+        """
+        Create data loaders for training, validation, and testing
+        """
+        self.create_tensor()
+        train_loader = DataLoader(self.train_features, batch_size=self.batch_size, shuffle=True)
+        val_loader = DataLoader(self.val_features, batch_size=self.batch_size, shuffle=True)
+        test_loader = DataLoader(self.test_features, batch_size=self.batch_size, shuffle=True)
+        print("Data loaded successfully.")
+        return train_loader, val_loader, test_loader
+    def get_true_anomalies(self):
+        """
+        Get the true anomalies from the test data
+        """
+        return self.test_labels

src/pipeline/model.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# Class to define the network architecture of the models
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torch.optim import Adam
+class VanillaLSTM(nn.Module):
+    def __init__(
+        self, input_dim=1, hidden_dim=64, output_dim=1, num_layers=2, dropout=0.2
+    ):
+        super(VanillaLSTM, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(
+            input_size=input_dim,
+            hidden_size=hidden_dim,
+            num_layers=num_layers,
+            batch_first=True,
+            dropout=dropout,
+        )
+        self.fc = nn.Linear(in_features=hidden_dim, out_features=output_dim)
+    def forward(self, x):
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
+        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
+        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
+        out = self.fc(out[:, -1, :])
+        return out
+class VAE(nn.Module):
+    def __init__(self, seq_len=48, n_features=1, hidden_dim=64, latent_dim=16, dropout=0.3):
+        super(VAE, self).__init__()
+        self.seq_len = seq_len
+        self.hidden_dim = hidden_dim
+        # Encoder
+        self.enc_lstm = nn.LSTM(
+            input_size=n_features,
+            hidden_size=hidden_dim,
+            batch_first=True
+        )
+        self.enc_dropout = nn.Dropout(p=dropout)
+        self.fc_mu = nn.Linear(hidden_dim, latent_dim)
+        self.fc_var = nn.Linear(hidden_dim, latent_dim)
+        # Decoder
+        self.fc_upsample = nn.Linear(latent_dim, seq_len * hidden_dim)
+        self.dec_dropout = nn.Dropout(p=dropout)
+        self.dec_lstm = nn.LSTM(
+            input_size=hidden_dim,
+            hidden_size=hidden_dim,
+            batch_first=True
+        )
+        self.fc_out = nn.Linear(hidden_dim, n_features)
+    def reparameterize(self, mu, log_var):
+        std = torch.exp(0.5 * log_var)
+        eps = torch.randn_like(std)
+        return mu + eps * std
+    def forward(self, x):
+        # Encode
+        _, (h_enc, c_enc) = self.enc_lstm(x)
+        h_enc = h_enc.squeeze(0)  # shape: (batch_size, hidden_dim)
+        h_enc = self.enc_dropout(h_enc)
+        mu, log_var = self.fc_mu(h_enc), self.fc_var(h_enc)
+        # Reparameterize at latent space
+        z = self.reparameterize(mu, log_var)
+        # Decode
+        z = self.fc_upsample(z)
+        z = z.view(-1, self.seq_len, self.hidden_dim)
+        decoded, _ = self.dec_lstm(z)
+        dec_out = self.dec_dropout(decoded)
+        out = self.fc_out(dec_out)
+        return out, mu, log_var
+class Transformer(nn.Module):
+    def __init__(self, input_dim=1, model_dim=64, num_layers=2, num_heads=4, dropout=0.2):
+        super(Transformer, self).__init__()
+        self.model_dim = model_dim
+        self.num_layers = num_layers
+        self.embedding = nn.Linear(input_dim, model_dim)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=model_dim,
+            nhead=num_heads,
+            dropout=dropout,
+            dim_feedforward=2*model_dim, # 128
+            batch_first=True
+        )
+        encoder_norm = nn.LayerNorm(model_dim)
+        self.transformer_encoder = nn.TransformerEncoder(
+            encoder_layer,
+            num_layers=num_layers,
+            norm=encoder_norm
+        )
+        decoder_layer = nn.TransformerDecoderLayer(
+            d_model=model_dim,
+            nhead=num_heads,
+            dropout=dropout,
+            dim_feedforward=2*model_dim, # 128
+            batch_first=True
+        )
+        decoder_norm = nn.LayerNorm(model_dim)
+        self.transformer_decoder = nn.TransformerDecoder(
+            decoder_layer,
+            num_layers=num_layers,
+            norm=decoder_norm
+        )
+        self.output = nn.Linear(model_dim, input_dim)
+    def forward(self, x):
+        embed_x = self.embedding(x)
+        enc_out = self.transformer_encoder(embed_x)
+        dec_out = self.transformer_decoder(embed_x, enc_out)
+        out = self.output(dec_out)
+        return out

src/pipeline/preprocesser.py ADDED Viewed

	@@ -0,0 +1,223 @@

+# Class for preprocessing the data before the training phase
+import os
+import pandas as pd
+import numpy as np
+import logging
+from sklearn.preprocessing import StandardScaler
+from adtk.data import validate_series, to_events
+from adtk.detector import SeasonalAD
+from path_config import DATA_DIR
+class Preprocessor:
+    def __init__(self):
+        self.scaler = StandardScaler()
+        self.raw_data = None
+        self.anomalies_events = None
+        self.logger = logging.getLogger(__name__)
+        self.window_size = None
+    def preprocess_data(self, file_path, val_split="2014-10-01", test_split="2014-10-16", window_size=48):
+        """
+        Preprocess the raw data
+        :param window_size: The size of the sliding window, default is 48
+        :param file_path: Path to the CSV file
+        """
+        # Load the raw data
+        self.logger.info("Loading raw data...")
+        self.load_raw_data(file_path)
+        # Detect anomalies and convert them to events
+        self.logger.info("Detecting anomalies...")
+        self.load_anomalies_events()
+        # Label the anomalies in the raw data
+        self.logger.info("Labeling anomalies...")
+        self._label_anomalies()
+        # Split the data chronologically
+        self.logger.info("Splitting the data into training, validation and testing set...")
+        train_data, val_data, test_data = self._chronological_split(val_split=val_split, test_split=test_split)
+        # Split the data into features and target
+        self.logger.info("Splitting the data into features and target...")
+        X_train, y_train = self._split_features_target(train_data)
+        X_val, y_val = self._split_features_target(val_data)
+        X_test, y_test = self._split_features_target(test_data)
+        # Scale the data
+        self.logger.info("Scaling the data...")
+        train_scaled, val_scaled, test_scaled = self._scale_data(X_train, X_val, X_test)
+        # Create a sliding window of data
+        self.logger.info(f"Creating sliding window with the length of {window_size} from the data...")
+        train_sequences = self._create_sliding_window(train_scaled, window_size=window_size)
+        val_sequences = self._create_sliding_window(val_scaled, window_size=window_size)
+        test_sequences = self._create_sliding_window(test_scaled, window_size=window_size)
+        self.window_size = window_size
+        # Save the preprocessed data
+        self.logger.info("Saving the preprocessed data...")
+        self.save_preprocessed_data(train_sequences, "train_features.npy")
+        self.save_preprocessed_data(val_sequences, "val_features.npy")
+        self.save_preprocessed_data(test_sequences, "test_features.npy")
+        self.save_preprocessed_data(y_train.values, "train_labels.npy")
+        self.save_preprocessed_data(y_val.values, "val_labels.npy")
+        self.save_preprocessed_data(y_test.values, "test_labels.npy")
+        print("Preprocessing completed!")
+    def load_raw_data(self, file_path):
+        """
+        Load raw data from a CSV file
+        :param file_path: Path to the CSV file
+        """
+        try:
+            df = pd.read_csv(
+                file_path,
+                usecols=["timestamp", "value"],
+                index_col="timestamp",
+                parse_dates=True,
+            )
+            df.sort_index(inplace=True)
+            # Rename the columns
+            df.rename(columns={"value": "Traffic"}, inplace=True)
+            df.index.rename("Timestamp", inplace=True)
+            # Validate the time series
+            self.raw_data = validate_series(df)
+        except FileNotFoundError as e:
+            print(f"File path does not exist: {file_path}")
+    def load_anomalies_events(self):
+        """
+        Load the anomalies events
+        """
+        if self.raw_data is None:
+            print("Raw data is not loaded")
+            return
+        events = [
+            ('2014-07-04 00:00:00', '2014-07-06 23:59:59'),  # Independence Day Celebration
+            ('2014-09-01 00:00:00', '2014-09-01 23:59:59'),  # Labour Day
+            ('2014-11-02 00:00:00', '2014-11-02 11:59:59'),  # NYC Marathon 2014
+            ('2014-11-27 00:00:00', '2014-11-27 23:59:59'),  # Thanksgiving Day
+            ('2014-12-25 00:00:00', '2014-12-26 23:59:59'),  # Christmas Holiday
+            ('2015-01-01 00:00:00', '2015-01-01 23:59:59'),  # New Year
+            ('2015-01-26 12:00:00', '2015-01-28 11:59:59')  # Snowstorm
+        ]
+        # Store the events
+        self.anomalies_events = events
+    def _label_anomalies(self):
+        """
+        Label the anomalies in the raw data
+        """
+        if self.raw_data is None:
+            print("Raw data is not loaded")
+            return
+        if self.anomalies_events is None:
+            print("Anomalies are not detected")
+            return
+        # Label the anomalies as 1 and 0
+        self.raw_data["Anomaly"] = 0
+        for start, end in self.anomalies_events:
+            self.raw_data.loc[start:end, "Anomaly"] = 1
+    def _chronological_split(self, val_split="2014-10-01", test_split="2014-10-16"):
+        """
+        Split the data chronologically into train, validation, and test sets
+        :param val_split: Validation split date
+        :param test_split: Test split date
+        """
+        if self.raw_data is None:
+            print("Raw data is not loaded")
+            return
+        # Split the data
+        train_data = self.raw_data.loc[self.raw_data.index < val_split]
+        val_data = self.raw_data.loc[
+            (self.raw_data.index >= val_split) & (self.raw_data.index < test_split)
+        ]
+        test_data = self.raw_data.loc[self.raw_data.index >= test_split]
+        return train_data, val_data, test_data
+    def _split_features_target(self, data, target_col="Anomaly"):
+        """
+        Split the data into features and target
+        :param data: DataFrame containing the data
+        :param target_col: Column to predict
+        """
+        # Split the data into features and target
+        X = data.drop(columns=[target_col])
+        y = data[target_col]
+        return X, y
+    def _scale_data(self, train_data, val_data, test_data):
+        """
+        Scale the data using StandardScaler
+        :param train_data: Training data
+        :param val_data: Validation data
+        :param test_data: Test data
+        """
+        if self.scaler is None:
+            self.scaler = StandardScaler()
+        # Fit and transform the training data
+        train_scaled = self.scaler.fit_transform(train_data)
+        val_scaled = self.scaler.transform(val_data)
+        test_scaled = self.scaler.transform(test_data)
+        return train_scaled, val_scaled, test_scaled
+    def _create_sliding_window(self, data, window_size=48, step_size=1):
+        """
+        Create a sliding window of data
+        :param data: Scaled data
+        :param window_size: Size of the window
+        :param step_size: Step size for the window
+        """
+        sequences = []
+        for i in range(0, len(data) - window_size + 1, step_size):
+            sequences.append(data[i : i + window_size])
+        return np.array(sequences)
+    def save_preprocessed_data(self, data, file_path):
+        """
+        Save preprocessed data to a .npy file
+        :param data: Preprocessed data
+        :param file_path: Path to save the .npy file
+        """
+        dir_path = os.path.join(DATA_DIR, "preprocessed_data")
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        file_path = os.path.join(dir_path, file_path)
+        np.save(file_path, data)
+        self.logger.info(f"{file_path} has been saved successfully!")
+    def get_seq_length(self):
+        """
+        Get the length of the sequence
+        """
+        return self.window_size

src/pipeline/trainer.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os
+import logging
+import copy
+import torch
+import torch.nn as nn
+from torch.optim import Adam
+from src.pipeline import VanillaLSTM, VAE, Transformer
+# Class for model training and evaluation
+class Trainer:
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.batch_size = None,
+        self.model = None
+        self.model_type = None
+        self.optimizer = None
+        self.criterion = None
+        self.train_loader = None
+        self.val_loader = None
+        self.test_loader = None
+        self.n_epochs = None
+        self.train_history = { 'train_loss': [], 'val_loss': [] }
+        self.best_model = None
+        self.best_val_loss = float('inf')
+    def init_model(self, model, model_type):
+        """
+        Initialize the model, optimizer and loss function
+        :param model: The model architecture
+        :param model_type: The type of the model
+        """
+        self.logger.info("Initialize the model...")
+        self.model = model.to(self.device)
+        if model_type not in ["lstm", "vae", "transformer"]:
+            raise ValueError("Model type not supported")
+        self.model_type = model_type
+    def config_train(self, batch_size=32, n_epochs=20, lr=0.001):
+        """
+        Configure the training parameters
+        :param batch_size: The batch size, default is 32
+        :param n_epochs: The number of epochs, default is 20
+        :param lr: The learning rate, default is 0.001
+        """
+        self.logger.info("Configure the training parameters...")
+        self.batch_size = batch_size
+        self.n_epochs = n_epochs
+        self.optimizer = Adam(self.model.parameters(), lr=lr)
+        self.criterion = nn.MSELoss()
+    def train(self, train_loader, val_loader):
+        """
+        Train the model
+        :param train_loader: The training data loader
+        :param val_loader: The validation data loader
+        """
+        print("Training the model...")
+        self.logger.info("Start training...")
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.best_val_loss = float('inf')
+        self.best_model = None
+        for epoch in range(self.n_epochs):
+            train_loss = self._train_epoch()
+            val_loss = self._val_epoch()
+            self.train_history['train_loss'].append(train_loss)
+            self.train_history['val_loss'].append(val_loss)
+            self.logger.info(f"Epoch {epoch + 1}/{self.n_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
+        self.logger.info("Training completed!")
+        print("Training completed!")
+        return self.best_model, self.train_history
+    def _train_epoch(self):
+        """
+        Train the model for one epoch
+        """
+        self.model.train()
+        train_loss = 0
+        for seq in self.train_loader:
+            self.optimizer.zero_grad()
+            if self.model_type == "lstm":
+                X_train = seq[:, :-1, :]  # All timestamp except the last one
+                y_train = seq[:, -1, :]   # Final timestamp
+                X_train = X_train.to(self.device)
+                y_train = y_train.to(self.device)
+                output = self.model(X_train)
+                loss = self.criterion(output, y_train)
+            elif self.model_type == "vae":
+                X = seq.to(self.device)
+                recon_X, mu, logvar = self.model(X)
+                recon_loss = self.criterion(recon_X, X)
+                kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / X.size(0)
+                loss = recon_loss + 0.2 * kl_div
+            elif self.model_type == "transformer":
+                X = seq.to(self.device)
+                recon_X = self.model(X)
+                loss = self.criterion(recon_X, X)
+            else:
+                raise ValueError("Model type not supported")
+            loss.backward()
+            self.optimizer.step()
+            train_loss += loss.item()
+        return train_loss / len(self.train_loader)
+    def _val_epoch(self):
+        """
+        Validate the model for one epoch
+        """
+        self.model.eval()
+        val_loss = 0
+        with torch.no_grad():
+            for seq in self.val_loader:
+                if self.model_type == "lstm":
+                    X_val = seq[:, :-1, :]
+                    y_val = seq[:, -1, :]
+                    X_val = X_val.to(self.device)
+                    y_val = y_val.to(self.device)
+                    output = self.model(X_val)
+                    loss = self.criterion(output, y_val)
+                elif self.model_type == "vae":
+                    X = seq.to(self.device)
+                    recon_X, mu, logvar = self.model(X)
+                    recon_loss = self.criterion(recon_X, X)
+                    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / X.size(0)
+                    loss = recon_loss + 0.2 * kl_div
+                elif self.model_type == "transformer":
+                    X_val = seq.to(self.device)
+                    recon_X = self.model(X_val)
+                    loss = self.criterion(recon_X, X_val)
+                else:
+                    raise ValueError("Model type not supported")
+                val_loss += loss.item()
+        if val_loss < self.best_val_loss:
+            self.best_model = copy.deepcopy(self.model)
+            self.best_val_loss = val_loss
+        return val_loss / len(self.val_loader)

src/pipeline/utils.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+import logging
+import torch
+import torch.nn as nn
+from path_config import MODEL_DIR
+def save_model(model, model_name):
+    """
+    Save the trained model
+    """
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    model_path = os.path.join(MODEL_DIR, model_name)
+    torch.save(model.state_dict(), model_path)
+    logging.info(f"Model saved at {model_path}")
+    print("Saved successfully!")

src/train.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from src.config.config import setup_logging
+from pipeline import Preprocessor, NYCDataLoader, Trainer, VanillaLSTM, Transformer, VAE, save_model
+from path_config import RAW_DATA_PATH
+def train():
+    seq_length = 24
+    setup_logging()
+    # Preprocess the data
+    preprocessor = Preprocessor()
+    preprocessor.preprocess_data(file_path=RAW_DATA_PATH, window_size=seq_length)
+    # Load the preprocessed data
+    data_loader = NYCDataLoader(batch_size=32)
+    train_loader, val_loader, test_loader = data_loader.load_data()
+    # Initialize the Trainer
+    trainer = Trainer()
+    # Train Vanilla LSTM model
+    trainer.init_model(model=VanillaLSTM(), model_type="lstm")
+    trainer.config_train(batch_size=32, n_epochs=20, lr=0.001)
+    lstm_model, lstm_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
+    # Train VAE model
+    trainer.init_model(model=VAE(seq_len=seq_length), model_type="vae")
+    trainer.config_train(batch_size=32, n_epochs=20, lr=0.001)
+    vae_model, vae_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
+    # Train Transformer model
+    trainer.init_model(model=Transformer(), model_type="transformer")
+    trainer.config_train(batch_size=32, n_epochs=5, lr=0.001)
+    transformer_model, transformer_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
+    # Save the models
+    save_model(lstm_model, "lstm_model_small.pth")
+    save_model(vae_model, "vae_model_small.pth")
+    save_model(transformer_model, "transformer_model_small.pth")
+if __name__ == '__main__':
+    train()