tgd1115 commited on
Commit
8474315
·
verified ·
1 Parent(s): 8b1f062

Upload 12 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#FFCC00" # Taxi yellow
3
+ backgroundColor = "#F0F0F0" # Light gray resembling city streets
4
+ secondaryBackgroundColor = "#FFFFFF" # White for clean sidebar contrast
5
+ textColor = "#333333" # Dark gray for readability
6
+ font = "sans serif" # Modern, clean font style
src/app.py CHANGED
@@ -1,17 +1,36 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
  import plotly.express as px
5
  import plotly.graph_objs as go
 
6
  from sklearn.preprocessing import StandardScaler
7
- from pyod.models.iforest import IForest
8
- from datetime import datetime, timedelta
 
 
 
 
 
 
9
 
10
 
 
11
  class NYCTaxiAnomalyDetector:
12
  def __init__(self, data):
13
  self.data = data.copy()
14
  self.scaler = StandardScaler()
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def filter_by_date_range(self, start_date, end_date):
17
  """
@@ -41,15 +60,16 @@ class NYCTaxiAnomalyDetector:
41
  :return: Scaled data and original index
42
  """
43
  # Ensure the column is numeric
44
- data[column] = pd.to_numeric(data[column], errors="coerce")
45
 
46
  # Remove NaN values
47
  clean_data = data[column].dropna()
48
 
49
  # Scale the data
50
  scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))
 
51
 
52
- return scaled_data, clean_data.index
53
 
54
  def detect_anomalies(self, data, column, contamination=0.05):
55
  """
@@ -60,77 +80,176 @@ class NYCTaxiAnomalyDetector:
60
  :param contamination: Expected proportion of outliers
61
  :return: DataFrame with anomaly detection results
62
  """
 
 
 
 
 
 
 
63
  # Preprocess data
64
- scaled_data, original_index = self.preprocess_data(data, column)
 
 
 
 
65
 
66
- # Apply Isolation Forest
67
- clf = IForest(contamination=contamination, random_state=42)
68
- y_pred = clf.fit_predict(scaled_data)
 
 
 
 
 
 
69
 
70
  # Create results DataFrame
71
  anomaly_results = pd.DataFrame(
72
  {
73
- "date": original_index,
74
- "value": data.loc[original_index, column],
75
- "is_anomaly": y_pred == 1,
 
 
76
  }
77
  )
78
 
79
  return anomaly_results
80
 
81
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  class AIContextGenerator:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def generate_context(self, anomaly_date):
84
  """
85
- Generate potential context for the anomaly
86
 
87
  :param anomaly_date: Date of the anomaly
88
- :return: List of contextual insights
89
  """
90
- # Mock contextual insights - replace with actual data sources
91
- contexts = [
92
- {
93
- "type": "Weather",
94
- "description": f"Weather conditions on {anomaly_date.date()}",
95
- "severity": "High",
96
- },
97
- {
98
- "type": "Event",
99
- "description": f"City events around {anomaly_date.date()}",
100
- "severity": "Medium",
101
- },
102
- {
103
- "type": "Economic",
104
- "description": f"Economic factors on {anomaly_date.date()}",
105
- "severity": "Low",
106
- },
107
- ]
108
- return contexts
109
 
110
 
111
- def load_nyc_taxi_data():
112
  """
113
- Load and preprocess NYC Taxi dataset
114
 
115
- :return: DataFrame with synthetic taxi traffic data
116
  """
117
- # Synthetic data generation
118
- dates = pd.date_range(start="2023-01-01", end="2023-12-31", freq="D")
119
- base_traffic = np.random.normal(5000, 500, len(dates))
120
 
121
- # Introduce some anomalies
122
- base_traffic[50] = 10000 # Extreme spike
123
- base_traffic[200] = 500 # Extreme drop
124
- base_traffic[300] = 12000 # Another spike
125
 
126
- df = pd.DataFrame({"date": dates, "daily_traffic": base_traffic})
 
127
 
128
  return df
129
 
130
 
131
  def main():
132
  st.set_page_config(
133
- page_title="NYC Taxi Traffic Anomaly Detection", page_icon="🚕", layout="wide"
 
 
 
134
  )
135
 
136
  st.title("🚕 NYC Taxi Traffic Anomaly Detection")
@@ -180,8 +299,16 @@ def main():
180
  filtered_data, "daily_traffic", contamination=anomaly_threshold
181
  )
182
 
 
 
 
 
 
 
 
 
183
  # Visualization
184
- st.header("Daily Taxi Traffic Trend")
185
  fig = px.line(
186
  filtered_data,
187
  x="date",
@@ -191,7 +318,6 @@ def main():
191
  )
192
 
193
  # Highlight Anomalies
194
- anomaly_points = filtered_data[anomalies["is_anomaly"]]
195
  fig.add_trace(
196
  go.Scatter(
197
  x=anomaly_points["date"],
@@ -205,29 +331,73 @@ def main():
205
  st.plotly_chart(fig, use_container_width=True)
206
 
207
  # Anomaly Details
208
- st.header("Anomaly Insights")
209
 
210
- if not anomaly_points.empty:
 
 
 
 
 
 
 
 
 
 
211
  context_generator = AIContextGenerator()
212
 
213
- for _, anomaly in anomaly_points.iterrows():
214
- st.subheader(f"Anomaly on {anomaly['date'].date()}")
 
 
 
 
 
 
 
 
 
215
 
216
  col1, col2 = st.columns(2)
217
 
218
  with col1:
219
- st.metric("Taxi Rides", f"{anomaly['daily_traffic']:.0f}")
 
 
 
 
220
 
221
  with col2:
222
  contexts = context_generator.generate_context(anomaly["date"])
223
- st.write("### Potential Context")
224
- for context in contexts:
225
- st.markdown(
226
- f"""
227
- - **{context['type']}**: {context['description']}
228
- (Severity: {context['severity']})
229
- """
230
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  else:
232
  st.info("No significant anomalies detected with current settings.")
233
 
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import plotly.express as px
4
  import plotly.graph_objs as go
5
+ import numpy as np
6
  from sklearn.preprocessing import StandardScaler
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ import torch
10
+ import torch.nn as nn
11
+ import os
12
+ from torch.utils.data import DataLoader, TensorDataset
13
+ from path_config import MODEL_DIR
14
+ from pipeline import Transformer
15
 
16
 
17
+ @dataclass()
18
  class NYCTaxiAnomalyDetector:
19
  def __init__(self, data):
20
  self.data = data.copy()
21
  self.scaler = StandardScaler()
22
+ self.model = None
23
+ self.TRANSFORMER_S_MODEL_PATH = os.path.join(
24
+ MODEL_DIR, "transformer_model_small.pth"
25
+ )
26
+
27
+ def create_sequences(self, data, seq_length=24):
28
+ """Create sequences for the transformer model"""
29
+ sequences = []
30
+ values = data.reshape(-1, 1)
31
+ for i in range(len(values) - seq_length + 1):
32
+ sequences.append(values[i : i + seq_length])
33
+ return np.array(sequences)
34
 
35
  def filter_by_date_range(self, start_date, end_date):
36
  """
 
60
  :return: Scaled data and original index
61
  """
62
  # Ensure the column is numeric
63
+ data.loc[:, column] = pd.to_numeric(data[column], errors="coerce")
64
 
65
  # Remove NaN values
66
  clean_data = data[column].dropna()
67
 
68
  # Scale the data
69
  scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))
70
+ sequences = self.create_sequences(scaled_data)
71
 
72
+ return sequences, clean_data.index[23:]
73
 
74
  def detect_anomalies(self, data, column, contamination=0.05):
75
  """
 
80
  :param contamination: Expected proportion of outliers
81
  :return: DataFrame with anomaly detection results
82
  """
83
+ if self.model is None:
84
+ self.model = Transformer()
85
+ self.model.load_state_dict(
86
+ torch.load(self.TRANSFORMER_S_MODEL_PATH, weights_only=True)
87
+ )
88
+ self.model.eval()
89
+
90
  # Preprocess data
91
+ sequences, original_index = self.preprocess_data(data, column)
92
+
93
+ # Create DataLoader
94
+ dataset = TensorDataset(torch.FloatTensor(sequences))
95
+ test_loader = DataLoader(dataset, batch_size=32, shuffle=False)
96
 
97
+ # Calculate threshold percentile from contamination
98
+ threshold_percentile = (1 - contamination) * 100
99
+
100
+ # Detect anomalies
101
+ reconstruction_errors, predictions, anomalies, optimal_threshold = (
102
+ self.detect_anomalies_batch(
103
+ self.model, test_loader, threshold_percentile=threshold_percentile
104
+ )
105
+ )
106
 
107
  # Create results DataFrame
108
  anomaly_results = pd.DataFrame(
109
  {
110
+ "date": data.loc[original_index, "date"],
111
+ column: data.loc[original_index, column],
112
+ "is_anomaly": anomalies,
113
+ "reconstruction_error": reconstruction_errors,
114
+ "prediction": predictions,
115
  }
116
  )
117
 
118
  return anomaly_results
119
 
120
+ def detect_anomalies_batch(self, model, test_loader, threshold_percentile=99.7):
121
+ """Detect anomalies in batches"""
122
+ reconstruction_errors = []
123
+ predictions = []
124
+
125
+ with torch.no_grad():
126
+ for seq_true in test_loader:
127
+ x = seq_true[0] # Remove extra dimension from TensorDataset
128
+ pred = model(x)
129
+ # Calculate reconstruction error for each sequence
130
+ errors = torch.mean(
131
+ torch.abs(pred - x), dim=(1, 2)
132
+ ) # Mean over sequence length and features
133
+ reconstruction_errors.extend(errors.cpu().numpy())
134
+ predictions.extend(
135
+ pred[:, -1, 0].cpu().numpy()
136
+ ) # Take last timestep prediction
137
+
138
+ reconstruction_errors = np.array(reconstruction_errors)
139
+ predictions = np.array(predictions)
140
+ optimal_threshold = np.percentile(reconstruction_errors, threshold_percentile)
141
+ anomalies = (reconstruction_errors > optimal_threshold).astype(int)
142
+
143
+ return reconstruction_errors, predictions, anomalies, optimal_threshold
144
+
145
+
146
+ @dataclass()
147
  class AIContextGenerator:
148
+ predefined_anomalies = {
149
+ datetime(2014, 11, 2).date(): [
150
+ {
151
+ "type": "NYC Marathon",
152
+ "description": "No significant anomalies detected with current settings. No significant anomalies detected with current settings. No significant anomalies detected with current settings.",
153
+ "reference": None,
154
+ }
155
+ ],
156
+ datetime(2014, 11, 27).date(): [
157
+ {
158
+ "type": "Thanksgiving Day",
159
+ "description": "Thanksgiving Day",
160
+ "reference": None,
161
+ }
162
+ ],
163
+ datetime(2014, 12, 25).date(): [
164
+ {
165
+ "type": "Christmas",
166
+ "description": "Christmas",
167
+ "reference": {
168
+ "text": "NYC Marathon 2014",
169
+ "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
170
+ },
171
+ }
172
+ ],
173
+ datetime(2015, 1, 1).date(): [
174
+ {
175
+ "type": "Event",
176
+ "description": "New Year's Day",
177
+ "reference": {
178
+ "text": "NYC Marathon 2014",
179
+ "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
180
+ },
181
+ }
182
+ ],
183
+ datetime(2015, 1, 26).date(): [
184
+ {
185
+ "type": "Event",
186
+ "description": "Snowstorm",
187
+ "reference": {
188
+ "text": "NYC Marathon 2014",
189
+ "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
190
+ },
191
+ }
192
+ ],
193
+ datetime(2015, 1, 27).date(): [
194
+ {
195
+ "type": "Event",
196
+ "description": "Snowstorm",
197
+ "reference": {
198
+ "text": "NYC Marathon 2014",
199
+ "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
200
+ },
201
+ }
202
+ ],
203
+ datetime(2014, 7, 1).date(): [
204
+ {
205
+ "type": "Event",
206
+ "description": "Testing",
207
+ "reference": {
208
+ "text": "NYC Marathon 2014",
209
+ "url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
210
+ },
211
+ }
212
+ ],
213
+ }
214
+
215
  def generate_context(self, anomaly_date):
216
  """
217
+ Generate potential context for the anomaly if predefined
218
 
219
  :param anomaly_date: Date of the anomaly
220
+ :return: List of contextual insights if available, else None
221
  """
222
+ if isinstance(anomaly_date, pd.Timestamp):
223
+ anomaly_date = anomaly_date.date()
224
+
225
+ return self.predefined_anomalies.get(anomaly_date, None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
 
228
+ def load_nyc_taxi_data(file_path="data/nyc_taxi_traffic_data.csv"):
229
  """
230
+ Load and preprocess NYC Taxi dataset from a CSV file.
231
 
232
+ :return: DataFrame with taxi traffic data
233
  """
234
+ # Load the CSV file
235
+ df = pd.read_csv(file_path)
 
236
 
237
+ # Ensure timestamp column is datetime and rename columns for consistency
238
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
239
+ df.rename(columns={"timestamp": "date", "value": "daily_traffic"}, inplace=True)
 
240
 
241
+ # Sort by date to ensure proper time-series ordering
242
+ df = df.sort_values(by="date").reset_index(drop=True)
243
 
244
  return df
245
 
246
 
247
  def main():
248
  st.set_page_config(
249
+ page_title="NYC Taxi Traffic Anomaly Detection",
250
+ page_icon="🚕",
251
+ layout="wide",
252
+ initial_sidebar_state="expanded",
253
  )
254
 
255
  st.title("🚕 NYC Taxi Traffic Anomaly Detection")
 
299
  filtered_data, "daily_traffic", contamination=anomaly_threshold
300
  )
301
 
302
+ # Get anomaly points for visualization
303
+ anomaly_points = anomalies[anomalies["is_anomaly"] == 1]
304
+
305
+ # Filter true anomalies based on predefined anomalies
306
+ true_anomaly_points = anomaly_points[
307
+ anomaly_points["date"].dt.date.isin(AIContextGenerator.predefined_anomalies)
308
+ ]
309
+
310
  # Visualization
311
+ st.header("Daily Taxi Traffic Trend")
312
  fig = px.line(
313
  filtered_data,
314
  x="date",
 
318
  )
319
 
320
  # Highlight Anomalies
 
321
  fig.add_trace(
322
  go.Scatter(
323
  x=anomaly_points["date"],
 
331
  st.plotly_chart(fig, use_container_width=True)
332
 
333
  # Anomaly Details
334
+ st.header("Insights of Anomalies with Known Events 📈")
335
 
336
+ # Calculate metrics using the anomalies DataFrame
337
+ total_anomalies_detected = len(anomaly_points)
338
+ true_anomalies = len(true_anomaly_points)
339
+ false_anomalies = total_anomalies_detected - true_anomalies
340
+
341
+ st.sidebar.subheader("Summary")
342
+ st.sidebar.metric("Total Anomalies Detected:", total_anomalies_detected)
343
+ st.sidebar.metric("Anomalies with Known Events:", true_anomalies)
344
+ st.sidebar.metric("Unexplained Anomalies:", false_anomalies)
345
+
346
+ if not true_anomaly_points.empty:
347
  context_generator = AIContextGenerator()
348
 
349
+ # Group by date and calculate min/max traffic
350
+ grouped_anomalies = (
351
+ true_anomaly_points.groupby(true_anomaly_points["date"].dt.date)
352
+ .agg({"daily_traffic": ["min", "max"]})
353
+ .reset_index()
354
+ )
355
+
356
+ # Flatten the multi-level columns
357
+ grouped_anomalies.columns = ["date", "min_traffic", "max_traffic"]
358
+
359
+ for _, anomaly in grouped_anomalies.iterrows():
360
 
361
  col1, col2 = st.columns(2)
362
 
363
  with col1:
364
+ st.subheader(f"Anomaly on {anomaly['date']}")
365
+ traffic_range = (
366
+ f"{anomaly['min_traffic']:.0f}-{anomaly['max_traffic']:.0f}"
367
+ )
368
+ st.metric("Taxi Rides Range", traffic_range)
369
 
370
  with col2:
371
  contexts = context_generator.generate_context(anomaly["date"])
372
+ if contexts:
373
+ for context in contexts:
374
+ st.subheader(f"Event: {context['type']}")
375
+
376
+ reference_text = (
377
+ context["reference"]["text"]
378
+ if context["reference"]
379
+ else "-"
380
+ )
381
+ reference_url = (
382
+ context["reference"]["url"]
383
+ if context["reference"] and context["reference"]["url"]
384
+ else ""
385
+ )
386
+
387
+ url = (
388
+ f"[{reference_text}]({reference_url})"
389
+ if reference_url
390
+ else reference_text
391
+ )
392
+
393
+ st.markdown(
394
+ f"""
395
+ - Description: {context['description']}
396
+ - Reference: {url}
397
+ """
398
+ )
399
+ else:
400
+ st.write("No significant event available for this anomaly.")
401
  else:
402
  st.info("No significant anomalies detected with current settings.")
403
 
src/config/config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from path_config import LOG_DIR
4
+
5
+ def setup_logging():
6
+ logging.basicConfig(
7
+ filename=os.path.join(LOG_DIR, 'train.log'),
8
+ level=logging.INFO,
9
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
10
+ )
src/inference.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.config.config import setup_logging
2
+ from src.pipeline import NYCDataLoader, VanillaLSTM, Transformer, VAE, AnomalyDetector
3
+
4
+
5
+ def inference():
6
+
7
+ seq_length = 48
8
+
9
+ setup_logging()
10
+
11
+ # Load the preprocessed data
12
+ data_loader = NYCDataLoader(batch_size=32)
13
+ train_loader, _, test_loader = data_loader.load_data()
14
+
15
+ # Get the true anomalies
16
+ true_anomalies = data_loader.get_true_anomalies()
17
+
18
+ # Initialize the AnomalyDetector
19
+ detector = AnomalyDetector()
20
+
21
+ # Load the trained models
22
+ detector.load_data(test_loader=test_loader)
23
+ detector.load_trained_model("transformer_model.pth", model_type="transformer")
24
+
25
+
src/logs/train.log ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-06 14:24:18,937 - pipeline.preprocesser - INFO - Loading raw data...
2
+ 2025-01-06 14:24:19,021 - pipeline.preprocesser - INFO - Detecting anomalies...
3
+ 2025-01-06 14:24:19,021 - pipeline.preprocesser - INFO - Labeling anomalies...
4
+ 2025-01-06 14:25:05,900 - pipeline.preprocesser - INFO - Loading raw data...
5
+ 2025-01-06 14:25:05,939 - pipeline.preprocesser - INFO - Detecting anomalies...
6
+ 2025-01-06 14:25:05,939 - pipeline.preprocesser - INFO - Labeling anomalies...
7
+ 2025-01-06 14:25:05,952 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
8
+ 2025-01-06 14:25:05,954 - pipeline.preprocesser - INFO - Splitting the data into features and target...
9
+ 2025-01-06 14:25:05,956 - pipeline.preprocesser - INFO - Scaling the data...
10
+ 2025-01-06 14:25:05,962 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
11
+ 2025-01-06 14:25:05,970 - pipeline.preprocesser - INFO - Saving the preprocessed data...
12
+ 2025-01-06 14:25:05,973 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
13
+ 2025-01-06 14:25:05,974 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
14
+ 2025-01-06 14:25:05,975 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
15
+ 2025-01-06 14:25:05,976 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
16
+ 2025-01-06 14:25:05,977 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
17
+ 2025-01-06 14:25:05,978 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
18
+ 2025-01-06 14:30:56,989 - pipeline.preprocesser - INFO - Loading raw data...
19
+ 2025-01-06 14:30:57,016 - pipeline.preprocesser - INFO - Detecting anomalies...
20
+ 2025-01-06 14:30:57,016 - pipeline.preprocesser - INFO - Labeling anomalies...
21
+ 2025-01-06 14:30:57,026 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
22
+ 2025-01-06 14:30:57,027 - pipeline.preprocesser - INFO - Splitting the data into features and target...
23
+ 2025-01-06 14:30:57,028 - pipeline.preprocesser - INFO - Scaling the data...
24
+ 2025-01-06 14:30:57,031 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
25
+ 2025-01-06 14:30:57,038 - pipeline.preprocesser - INFO - Saving the preprocessed data...
26
+ 2025-01-06 14:30:57,040 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
27
+ 2025-01-06 14:30:57,041 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
28
+ 2025-01-06 14:30:57,042 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
29
+ 2025-01-06 14:30:57,043 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
30
+ 2025-01-06 14:30:57,043 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
31
+ 2025-01-06 14:30:57,044 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
32
+ 2025-01-06 14:30:57,122 - pipeline.trainer - INFO - Initialize the model...
33
+ 2025-01-06 14:31:42,480 - pipeline.preprocesser - INFO - Loading raw data...
34
+ 2025-01-06 14:31:42,498 - pipeline.preprocesser - INFO - Detecting anomalies...
35
+ 2025-01-06 14:31:42,498 - pipeline.preprocesser - INFO - Labeling anomalies...
36
+ 2025-01-06 14:31:42,506 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
37
+ 2025-01-06 14:31:42,508 - pipeline.preprocesser - INFO - Splitting the data into features and target...
38
+ 2025-01-06 14:31:42,509 - pipeline.preprocesser - INFO - Scaling the data...
39
+ 2025-01-06 14:31:42,512 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
40
+ 2025-01-06 14:31:42,518 - pipeline.preprocesser - INFO - Saving the preprocessed data...
41
+ 2025-01-06 14:31:42,519 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
42
+ 2025-01-06 14:31:42,521 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
43
+ 2025-01-06 14:31:42,523 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
44
+ 2025-01-06 14:31:42,524 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
45
+ 2025-01-06 14:31:42,525 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
46
+ 2025-01-06 14:31:42,525 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
47
+ 2025-01-06 14:31:42,584 - pipeline.trainer - INFO - Initialize the model...
48
+ 2025-01-06 14:32:10,258 - pipeline.preprocesser - INFO - Loading raw data...
49
+ 2025-01-06 14:32:10,275 - pipeline.preprocesser - INFO - Detecting anomalies...
50
+ 2025-01-06 14:32:10,275 - pipeline.preprocesser - INFO - Labeling anomalies...
51
+ 2025-01-06 14:32:10,284 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
52
+ 2025-01-06 14:32:10,285 - pipeline.preprocesser - INFO - Splitting the data into features and target...
53
+ 2025-01-06 14:32:10,286 - pipeline.preprocesser - INFO - Scaling the data...
54
+ 2025-01-06 14:32:10,291 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
55
+ 2025-01-06 14:32:10,297 - pipeline.preprocesser - INFO - Saving the preprocessed data...
56
+ 2025-01-06 14:32:10,307 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
57
+ 2025-01-06 14:32:10,310 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
58
+ 2025-01-06 14:32:10,314 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
59
+ 2025-01-06 14:32:10,316 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
60
+ 2025-01-06 14:32:10,318 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
61
+ 2025-01-06 14:32:10,319 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
62
+ 2025-01-06 14:32:10,393 - pipeline.trainer - INFO - Initialize the model...
63
+ 2025-01-06 14:33:33,454 - pipeline.preprocesser - INFO - Loading raw data...
64
+ 2025-01-06 14:33:33,473 - pipeline.preprocesser - INFO - Detecting anomalies...
65
+ 2025-01-06 14:33:33,473 - pipeline.preprocesser - INFO - Labeling anomalies...
66
+ 2025-01-06 14:33:33,482 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
67
+ 2025-01-06 14:33:33,482 - pipeline.preprocesser - INFO - Splitting the data into features and target...
68
+ 2025-01-06 14:33:33,483 - pipeline.preprocesser - INFO - Scaling the data...
69
+ 2025-01-06 14:33:33,488 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
70
+ 2025-01-06 14:33:33,494 - pipeline.preprocesser - INFO - Saving the preprocessed data...
71
+ 2025-01-06 14:33:33,498 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
72
+ 2025-01-06 14:33:33,499 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
73
+ 2025-01-06 14:33:33,501 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
74
+ 2025-01-06 14:33:33,501 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
75
+ 2025-01-06 14:33:33,502 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
76
+ 2025-01-06 14:33:33,502 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
77
+ 2025-01-06 14:33:53,220 - pipeline.preprocesser - INFO - Loading raw data...
78
+ 2025-01-06 14:33:53,240 - pipeline.preprocesser - INFO - Detecting anomalies...
79
+ 2025-01-06 14:33:53,240 - pipeline.preprocesser - INFO - Labeling anomalies...
80
+ 2025-01-06 14:33:53,247 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
81
+ 2025-01-06 14:33:53,248 - pipeline.preprocesser - INFO - Splitting the data into features and target...
82
+ 2025-01-06 14:33:53,249 - pipeline.preprocesser - INFO - Scaling the data...
83
+ 2025-01-06 14:33:53,253 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
84
+ 2025-01-06 14:33:53,260 - pipeline.preprocesser - INFO - Saving the preprocessed data...
85
+ 2025-01-06 14:33:53,261 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
86
+ 2025-01-06 14:33:53,263 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
87
+ 2025-01-06 14:33:53,264 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
88
+ 2025-01-06 14:33:53,265 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
89
+ 2025-01-06 14:33:53,266 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
90
+ 2025-01-06 14:33:53,267 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
91
+ 2025-01-06 14:33:53,320 - pipeline.trainer - INFO - Initialize the model...
92
+ 2025-01-06 14:33:53,320 - pipeline.trainer - INFO - Configure the training parameters...
93
+ 2025-01-06 14:33:55,517 - pipeline.trainer - INFO - Start training...
94
+ 2025-01-06 14:33:56,930 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.3034, Val Loss: 0.0795
95
+ 2025-01-06 14:33:58,164 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0653, Val Loss: 0.0455
96
+ 2025-01-06 14:33:59,489 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0439, Val Loss: 0.0350
97
+ 2025-01-06 14:34:00,754 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0357, Val Loss: 0.0351
98
+ 2025-01-06 14:34:02,119 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0337, Val Loss: 0.0251
99
+ 2025-01-06 14:34:03,447 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0293, Val Loss: 0.0248
100
+ 2025-01-06 14:34:04,771 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0268, Val Loss: 0.0235
101
+ 2025-01-06 14:34:06,231 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0273, Val Loss: 0.0215
102
+ 2025-01-06 14:34:07,567 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0244, Val Loss: 0.0182
103
+ 2025-01-06 14:34:08,960 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0242, Val Loss: 0.0190
104
+ 2025-01-06 14:34:10,369 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0234, Val Loss: 0.0340
105
+ 2025-01-06 14:34:11,813 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0214, Val Loss: 0.0185
106
+ 2025-01-06 14:34:13,487 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0204, Val Loss: 0.0165
107
+ 2025-01-06 14:34:15,658 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0202, Val Loss: 0.0166
108
+ 2025-01-06 14:34:17,142 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0194, Val Loss: 0.0156
109
+ 2025-01-06 14:34:18,893 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0184, Val Loss: 0.0183
110
+ 2025-01-06 14:34:20,405 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0181, Val Loss: 0.0166
111
+ 2025-01-06 14:34:21,905 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0177, Val Loss: 0.0164
112
+ 2025-01-06 14:34:23,342 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0173, Val Loss: 0.0163
113
+ 2025-01-06 14:34:24,784 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0169, Val Loss: 0.0137
114
+ 2025-01-06 14:34:24,784 - pipeline.trainer - INFO - Training completed!
115
+ 2025-01-06 14:34:24,785 - pipeline.trainer - INFO - Initialize the model...
116
+ 2025-01-06 14:34:24,786 - pipeline.trainer - INFO - Configure the training parameters...
117
+ 2025-01-06 14:34:24,786 - pipeline.trainer - INFO - Start training...
118
+ 2025-01-06 14:34:26,589 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 1.0037, Val Loss: 1.1303
119
+ 2025-01-06 14:34:28,275 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.9997, Val Loss: 1.1426
120
+ 2025-01-06 14:34:30,034 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.9988, Val Loss: 1.1378
121
+ 2025-01-06 14:34:31,756 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.9897, Val Loss: 1.0285
122
+ 2025-01-06 14:34:33,542 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.8906, Val Loss: 0.9805
123
+ 2025-01-06 14:34:35,897 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.8219, Val Loss: 0.8182
124
+ 2025-01-06 14:34:37,835 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.7519, Val Loss: 0.7566
125
+ 2025-01-06 14:34:39,565 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.7407, Val Loss: 0.7998
126
+ 2025-01-06 14:34:41,248 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.7306, Val Loss: 0.7671
127
+ 2025-01-06 14:34:43,010 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.7294, Val Loss: 0.7305
128
+ 2025-01-06 14:34:44,642 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.7223, Val Loss: 0.7628
129
+ 2025-01-06 14:34:46,323 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.7174, Val Loss: 0.7732
130
+ 2025-01-06 14:34:48,097 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.7136, Val Loss: 0.7379
131
+ 2025-01-06 14:34:49,784 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.7142, Val Loss: 0.7372
132
+ 2025-01-06 14:34:51,476 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.7131, Val Loss: 0.7190
133
+ 2025-01-06 14:34:53,172 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.7203, Val Loss: 0.7440
134
+ 2025-01-06 14:34:54,822 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.7083, Val Loss: 0.7466
135
+ 2025-01-06 14:34:56,503 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.7139, Val Loss: 0.7061
136
+ 2025-01-06 14:34:58,186 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.7102, Val Loss: 0.7272
137
+ 2025-01-06 14:34:59,852 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.7139, Val Loss: 0.7414
138
+ 2025-01-06 14:34:59,852 - pipeline.trainer - INFO - Training completed!
139
+ 2025-01-06 14:34:59,871 - pipeline.trainer - INFO - Initialize the model...
140
+ 2025-01-06 14:34:59,871 - pipeline.trainer - INFO - Configure the training parameters...
141
+ 2025-01-06 14:34:59,872 - pipeline.trainer - INFO - Start training...
142
+ 2025-01-06 14:35:08,324 - pipeline.trainer - INFO - Epoch 1/5, Train Loss: 0.0648, Val Loss: 0.0018
143
+ 2025-01-06 14:35:15,897 - pipeline.trainer - INFO - Epoch 2/5, Train Loss: 0.0059, Val Loss: 0.0024
144
+ 2025-01-06 14:35:23,713 - pipeline.trainer - INFO - Epoch 3/5, Train Loss: 0.0032, Val Loss: 0.0012
145
+ 2025-01-06 14:35:31,344 - pipeline.trainer - INFO - Epoch 4/5, Train Loss: 0.0022, Val Loss: 0.0005
146
+ 2025-01-06 14:35:39,079 - pipeline.trainer - INFO - Epoch 5/5, Train Loss: 0.0016, Val Loss: 0.0002
147
+ 2025-01-06 14:35:39,079 - pipeline.trainer - INFO - Training completed!
148
+ 2025-01-06 19:00:28,816 - pipeline.preprocesser - INFO - Loading raw data...
149
+ 2025-01-06 19:00:28,853 - pipeline.preprocesser - INFO - Detecting anomalies...
150
+ 2025-01-06 19:00:28,853 - pipeline.preprocesser - INFO - Labeling anomalies...
151
+ 2025-01-06 19:00:28,876 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
152
+ 2025-01-06 19:00:28,879 - pipeline.preprocesser - INFO - Splitting the data into features and target...
153
+ 2025-01-06 19:00:28,881 - pipeline.preprocesser - INFO - Scaling the data...
154
+ 2025-01-06 19:00:28,889 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
155
+ 2025-01-06 19:00:28,896 - pipeline.preprocesser - INFO - Saving the preprocessed data...
156
+ 2025-01-06 19:00:28,898 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
157
+ 2025-01-06 19:00:28,899 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
158
+ 2025-01-06 19:00:28,900 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
159
+ 2025-01-06 19:00:28,901 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
160
+ 2025-01-06 19:00:28,902 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
161
+ 2025-01-06 19:00:28,903 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
162
+ 2025-01-06 19:00:28,986 - pipeline.trainer - INFO - Initialize the model...
163
+ 2025-01-06 19:00:28,987 - pipeline.trainer - INFO - Configure the training parameters...
164
+ 2025-01-06 19:00:31,247 - pipeline.trainer - INFO - Start training...
165
+ 2025-01-06 19:00:32,247 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2837, Val Loss: 0.0671
166
+ 2025-01-06 19:00:33,049 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0577, Val Loss: 0.0383
167
+ 2025-01-06 19:00:34,549 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0440, Val Loss: 0.0301
168
+ 2025-01-06 19:00:35,469 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0352, Val Loss: 0.0342
169
+ 2025-01-06 19:00:36,504 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0309, Val Loss: 0.0212
170
+ 2025-01-06 19:00:37,677 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0281, Val Loss: 0.0234
171
+ 2025-01-06 19:00:39,663 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0254, Val Loss: 0.0234
172
+ 2025-01-06 19:00:41,030 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0242, Val Loss: 0.0203
173
+ 2025-01-06 19:00:42,704 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0233, Val Loss: 0.0263
174
+ 2025-01-06 19:00:44,501 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0226, Val Loss: 0.0194
175
+ 2025-01-06 19:00:45,647 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0215, Val Loss: 0.0191
176
+ 2025-01-06 19:00:47,411 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0223, Val Loss: 0.0179
177
+ 2025-01-06 19:00:48,595 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0214, Val Loss: 0.0223
178
+ 2025-01-06 19:00:49,564 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0200, Val Loss: 0.0178
179
+ 2025-01-06 19:00:50,591 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0193, Val Loss: 0.0186
180
+ 2025-01-06 19:00:51,548 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0187, Val Loss: 0.0172
181
+ 2025-01-06 19:00:52,543 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0184, Val Loss: 0.0159
182
+ 2025-01-06 19:00:53,596 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0178, Val Loss: 0.0173
183
+ 2025-01-06 19:00:54,580 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0175, Val Loss: 0.0154
184
+ 2025-01-06 19:00:55,473 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0169, Val Loss: 0.0169
185
+ 2025-01-06 19:00:55,473 - pipeline.trainer - INFO - Training completed!
186
+ 2025-01-06 19:00:55,475 - pipeline.trainer - INFO - Initialize the model...
187
+ 2025-01-06 19:00:55,476 - pipeline.trainer - INFO - Configure the training parameters...
188
+ 2025-01-06 19:00:55,477 - pipeline.trainer - INFO - Start training...
189
+ 2025-01-06 19:02:52,646 - pipeline.preprocesser - INFO - Loading raw data...
190
+ 2025-01-06 19:02:52,671 - pipeline.preprocesser - INFO - Detecting anomalies...
191
+ 2025-01-06 19:02:52,671 - pipeline.preprocesser - INFO - Labeling anomalies...
192
+ 2025-01-06 19:02:52,680 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
193
+ 2025-01-06 19:02:52,681 - pipeline.preprocesser - INFO - Splitting the data into features and target...
194
+ 2025-01-06 19:02:52,682 - pipeline.preprocesser - INFO - Scaling the data...
195
+ 2025-01-06 19:02:52,686 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
196
+ 2025-01-06 19:02:52,693 - pipeline.preprocesser - INFO - Saving the preprocessed data...
197
+ 2025-01-06 19:02:52,694 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
198
+ 2025-01-06 19:02:52,695 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
199
+ 2025-01-06 19:02:52,696 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
200
+ 2025-01-06 19:02:52,697 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
201
+ 2025-01-06 19:02:52,697 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
202
+ 2025-01-06 19:02:52,698 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
203
+ 2025-01-06 19:02:52,782 - pipeline.trainer - INFO - Initialize the model...
204
+ 2025-01-06 19:02:52,784 - pipeline.trainer - INFO - Configure the training parameters...
205
+ 2025-01-06 19:02:54,413 - pipeline.trainer - INFO - Start training...
206
+ 2025-01-06 19:02:55,398 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2684, Val Loss: 0.0641
207
+ 2025-01-06 19:02:56,284 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0561, Val Loss: 0.0374
208
+ 2025-01-06 19:02:57,174 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0403, Val Loss: 0.0275
209
+ 2025-01-06 19:02:58,048 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0344, Val Loss: 0.0240
210
+ 2025-01-06 19:02:59,059 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0273, Val Loss: 0.0220
211
+ 2025-01-06 19:03:00,078 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0267, Val Loss: 0.0213
212
+ 2025-01-06 19:03:01,054 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0242, Val Loss: 0.0194
213
+ 2025-01-06 19:03:02,053 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0240, Val Loss: 0.0184
214
+ 2025-01-06 19:03:02,954 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0222, Val Loss: 0.0183
215
+ 2025-01-06 19:03:03,976 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0218, Val Loss: 0.0200
216
+ 2025-01-06 19:03:04,897 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0211, Val Loss: 0.0181
217
+ 2025-01-06 19:03:05,807 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0197, Val Loss: 0.0171
218
+ 2025-01-06 19:03:07,057 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0197, Val Loss: 0.0185
219
+ 2025-01-06 19:03:08,468 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0191, Val Loss: 0.0181
220
+ 2025-01-06 19:03:09,581 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0188, Val Loss: 0.0186
221
+ 2025-01-06 19:03:10,485 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0185, Val Loss: 0.0185
222
+ 2025-01-06 19:03:11,373 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0181, Val Loss: 0.0164
223
+ 2025-01-06 19:03:12,261 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0181, Val Loss: 0.0179
224
+ 2025-01-06 19:03:13,120 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0170, Val Loss: 0.0153
225
+ 2025-01-06 19:03:14,041 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0166, Val Loss: 0.0141
226
+ 2025-01-06 19:03:14,041 - pipeline.trainer - INFO - Training completed!
227
+ 2025-01-06 19:03:29,028 - pipeline.preprocesser - INFO - Loading raw data...
228
+ 2025-01-06 19:03:29,058 - pipeline.preprocesser - INFO - Detecting anomalies...
229
+ 2025-01-06 19:03:29,058 - pipeline.preprocesser - INFO - Labeling anomalies...
230
+ 2025-01-06 19:03:29,069 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
231
+ 2025-01-06 19:03:29,070 - pipeline.preprocesser - INFO - Splitting the data into features and target...
232
+ 2025-01-06 19:03:29,072 - pipeline.preprocesser - INFO - Scaling the data...
233
+ 2025-01-06 19:03:29,077 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
234
+ 2025-01-06 19:03:29,087 - pipeline.preprocesser - INFO - Saving the preprocessed data...
235
+ 2025-01-06 19:03:29,088 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
236
+ 2025-01-06 19:03:29,089 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
237
+ 2025-01-06 19:03:29,092 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
238
+ 2025-01-06 19:03:29,093 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
239
+ 2025-01-06 19:03:29,093 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
240
+ 2025-01-06 19:03:29,094 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
241
+ 2025-01-06 19:03:29,156 - pipeline.trainer - INFO - Initialize the model...
242
+ 2025-01-06 19:03:29,157 - pipeline.trainer - INFO - Configure the training parameters...
243
+ 2025-01-06 19:03:30,542 - pipeline.trainer - INFO - Start training...
244
+ 2025-01-06 19:03:31,393 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2989, Val Loss: 0.0746
245
+ 2025-01-06 19:03:32,272 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0619, Val Loss: 0.0432
246
+ 2025-01-06 19:03:33,148 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0434, Val Loss: 0.0294
247
+ 2025-01-06 19:03:34,056 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0372, Val Loss: 0.0289
248
+ 2025-01-06 19:03:34,932 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0334, Val Loss: 0.0241
249
+ 2025-01-06 19:03:35,838 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0306, Val Loss: 0.0254
250
+ 2025-01-06 19:03:36,718 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0296, Val Loss: 0.0218
251
+ 2025-01-06 19:03:37,602 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0263, Val Loss: 0.0228
252
+ 2025-01-06 19:03:38,475 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0253, Val Loss: 0.0266
253
+ 2025-01-06 19:03:39,344 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0247, Val Loss: 0.0200
254
+ 2025-01-06 19:03:40,207 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0234, Val Loss: 0.0206
255
+ 2025-01-06 19:03:41,117 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0224, Val Loss: 0.0198
256
+ 2025-01-06 19:03:41,980 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0220, Val Loss: 0.0188
257
+ 2025-01-06 19:03:42,848 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0215, Val Loss: 0.0194
258
+ 2025-01-06 19:03:43,749 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0210, Val Loss: 0.0217
259
+ 2025-01-06 19:03:44,659 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0204, Val Loss: 0.0208
260
+ 2025-01-06 19:03:45,532 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0207, Val Loss: 0.0208
261
+ 2025-01-06 19:03:46,410 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0204, Val Loss: 0.0252
262
+ 2025-01-06 19:03:47,283 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0204, Val Loss: 0.0193
263
+ 2025-01-06 19:03:48,166 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0191, Val Loss: 0.0184
264
+ 2025-01-06 19:03:48,166 - pipeline.trainer - INFO - Training completed!
265
+ 2025-01-06 19:03:48,168 - pipeline.trainer - INFO - Initialize the model...
266
+ 2025-01-06 19:03:48,168 - pipeline.trainer - INFO - Configure the training parameters...
267
+ 2025-01-06 19:03:48,169 - pipeline.trainer - INFO - Start training...
268
+ 2025-01-06 19:03:49,381 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 1.0015, Val Loss: 1.0956
269
+ 2025-01-06 19:03:50,607 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.8309, Val Loss: 0.8594
270
+ 2025-01-06 19:03:52,089 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.7853, Val Loss: 0.8332
271
+ 2025-01-06 19:03:53,445 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.7632, Val Loss: 0.8482
272
+ 2025-01-06 19:03:54,595 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.7387, Val Loss: 0.7666
273
+ 2025-01-06 19:03:55,804 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.6963, Val Loss: 0.6869
274
+ 2025-01-06 19:03:56,969 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.6593, Val Loss: 0.6919
275
+ 2025-01-06 19:03:58,135 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.6531, Val Loss: 0.6691
276
+ 2025-01-06 19:03:59,274 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.6444, Val Loss: 0.6625
277
+ 2025-01-06 19:04:00,445 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.6473, Val Loss: 0.6644
278
+ 2025-01-06 19:04:01,577 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.6338, Val Loss: 0.6757
279
+ 2025-01-06 19:04:02,737 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.6356, Val Loss: 0.6671
280
+ 2025-01-06 19:04:03,890 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.6390, Val Loss: 0.6591
281
+ 2025-01-06 19:04:05,140 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.6335, Val Loss: 0.6530
282
+ 2025-01-06 19:04:06,665 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.6349, Val Loss: 0.6527
283
+ 2025-01-06 19:04:08,485 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.6383, Val Loss: 0.6734
284
+ 2025-01-06 19:04:09,881 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.6310, Val Loss: 0.6583
285
+ 2025-01-06 19:04:11,073 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.6340, Val Loss: 0.6496
286
+ 2025-01-06 19:04:12,325 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.6363, Val Loss: 0.6392
287
+ 2025-01-06 19:04:13,552 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.6296, Val Loss: 0.6535
288
+ 2025-01-06 19:04:13,552 - pipeline.trainer - INFO - Training completed!
289
+ 2025-01-06 19:04:13,561 - pipeline.trainer - INFO - Initialize the model...
290
+ 2025-01-06 19:04:13,562 - pipeline.trainer - INFO - Configure the training parameters...
291
+ 2025-01-06 19:04:13,562 - pipeline.trainer - INFO - Start training...
292
+ 2025-01-06 19:04:18,489 - pipeline.trainer - INFO - Epoch 1/5, Train Loss: 0.0816, Val Loss: 0.0026
293
+ 2025-01-06 19:04:23,306 - pipeline.trainer - INFO - Epoch 2/5, Train Loss: 0.0082, Val Loss: 0.0021
294
+ 2025-01-06 19:04:28,844 - pipeline.trainer - INFO - Epoch 3/5, Train Loss: 0.0046, Val Loss: 0.0012
295
+ 2025-01-06 19:04:33,885 - pipeline.trainer - INFO - Epoch 4/5, Train Loss: 0.0031, Val Loss: 0.0006
296
+ 2025-01-06 19:04:38,925 - pipeline.trainer - INFO - Epoch 5/5, Train Loss: 0.0024, Val Loss: 0.0007
297
+ 2025-01-06 19:04:38,925 - pipeline.trainer - INFO - Training completed!
298
+ 2025-01-06 19:04:38,939 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\lstm_model_small.pth
299
+ 2025-01-06 19:04:38,943 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\vae_model_small.pth
300
+ 2025-01-06 19:04:38,951 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\transformer_model_small.pth
src/pipeline/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .dataloader import NYCDataLoader
2
+ from .preprocesser import Preprocessor
3
+ from .model import (VanillaLSTM,
4
+ Transformer,
5
+ VAE)
6
+ from .trainer import Trainer
7
+ from .utils import save_model
src/pipeline/dataloader.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Class for loading data from the dataset
2
+ import os
3
+ import logging
4
+ import pandas as pd
5
+ import torch
6
+ import numpy as np
7
+ import pandas as pd
8
+ from torch.utils.data import TensorDataset, DataLoader
9
+
10
+ from path_config import (
11
+ RAW_DATA_PATH,
12
+ TRAIN_FEATURES_PATH,
13
+ TRAIN_LABELS_PATH,
14
+ VAL_FEATURES_PATH,
15
+ VAL_LABELS_PATH,
16
+ TEST_FEATURES_PATH,
17
+ TEST_LABELS_PATH,
18
+ )
19
+
20
+
21
+ class NYCDataLoader:
22
+
23
+ def __init__(self, batch_size):
24
+ self.batch_size = batch_size
25
+ self.train_features = None
26
+ self.train_labels = None
27
+ self.val_features = None
28
+ self.val_labels = None
29
+ self.test_features = None
30
+ self.test_labels = None
31
+ self.logger = logging.getLogger(__name__)
32
+
33
+ def create_tensor(self):
34
+ """
35
+ Load the preprocessed data and convert them to tensors
36
+ """
37
+ try:
38
+ self.train_features = torch.tensor(np.load(TRAIN_FEATURES_PATH)).float()
39
+ self.train_labels = torch.tensor(np.load(TRAIN_LABELS_PATH)).float()
40
+ self.val_features = torch.tensor(np.load(VAL_FEATURES_PATH)).float()
41
+ self.val_labels = torch.tensor(np.load(VAL_LABELS_PATH)).float()
42
+ self.test_features = torch.tensor(np.load(TEST_FEATURES_PATH)).float()
43
+ self.test_labels = torch.tensor(np.load(TEST_LABELS_PATH)).float()
44
+ except FileNotFoundError:
45
+ print("Preprocessed data not found. Please run the preprocessing script first.")
46
+
47
+ def load_data(self):
48
+ """
49
+ Create data loaders for training, validation, and testing
50
+ """
51
+ self.create_tensor()
52
+
53
+ train_loader = DataLoader(self.train_features, batch_size=self.batch_size, shuffle=True)
54
+ val_loader = DataLoader(self.val_features, batch_size=self.batch_size, shuffle=True)
55
+ test_loader = DataLoader(self.test_features, batch_size=self.batch_size, shuffle=True)
56
+
57
+ print("Data loaded successfully.")
58
+
59
+ return train_loader, val_loader, test_loader
60
+
61
+ def get_true_anomalies(self):
62
+ """
63
+ Get the true anomalies from the test data
64
+ """
65
+ return self.test_labels
src/pipeline/model.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Class to define the network architecture of the models
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torch.utils.data import DataLoader
6
+ from torch.optim import Adam
7
+
8
+
9
+ class VanillaLSTM(nn.Module):
10
+ def __init__(
11
+ self, input_dim=1, hidden_dim=64, output_dim=1, num_layers=2, dropout=0.2
12
+ ):
13
+ super(VanillaLSTM, self).__init__()
14
+ self.hidden_dim = hidden_dim
15
+ self.num_layers = num_layers
16
+
17
+ self.lstm = nn.LSTM(
18
+ input_size=input_dim,
19
+ hidden_size=hidden_dim,
20
+ num_layers=num_layers,
21
+ batch_first=True,
22
+ dropout=dropout,
23
+ )
24
+ self.fc = nn.Linear(in_features=hidden_dim, out_features=output_dim)
25
+
26
+ def forward(self, x):
27
+ h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
28
+ c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
29
+
30
+ out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
31
+ out = self.fc(out[:, -1, :])
32
+
33
+ return out
34
+
35
+ class VAE(nn.Module):
36
+
37
+ def __init__(self, seq_len=48, n_features=1, hidden_dim=64, latent_dim=16, dropout=0.3):
38
+ super(VAE, self).__init__()
39
+ self.seq_len = seq_len
40
+ self.hidden_dim = hidden_dim
41
+
42
+ # Encoder
43
+ self.enc_lstm = nn.LSTM(
44
+ input_size=n_features,
45
+ hidden_size=hidden_dim,
46
+ batch_first=True
47
+ )
48
+ self.enc_dropout = nn.Dropout(p=dropout)
49
+ self.fc_mu = nn.Linear(hidden_dim, latent_dim)
50
+ self.fc_var = nn.Linear(hidden_dim, latent_dim)
51
+
52
+ # Decoder
53
+ self.fc_upsample = nn.Linear(latent_dim, seq_len * hidden_dim)
54
+ self.dec_dropout = nn.Dropout(p=dropout)
55
+ self.dec_lstm = nn.LSTM(
56
+ input_size=hidden_dim,
57
+ hidden_size=hidden_dim,
58
+ batch_first=True
59
+ )
60
+ self.fc_out = nn.Linear(hidden_dim, n_features)
61
+
62
+ def reparameterize(self, mu, log_var):
63
+ std = torch.exp(0.5 * log_var)
64
+ eps = torch.randn_like(std)
65
+ return mu + eps * std
66
+
67
+ def forward(self, x):
68
+ # Encode
69
+ _, (h_enc, c_enc) = self.enc_lstm(x)
70
+ h_enc = h_enc.squeeze(0) # shape: (batch_size, hidden_dim)
71
+ h_enc = self.enc_dropout(h_enc)
72
+ mu, log_var = self.fc_mu(h_enc), self.fc_var(h_enc)
73
+
74
+ # Reparameterize at latent space
75
+ z = self.reparameterize(mu, log_var)
76
+
77
+ # Decode
78
+ z = self.fc_upsample(z)
79
+ z = z.view(-1, self.seq_len, self.hidden_dim)
80
+ decoded, _ = self.dec_lstm(z)
81
+ dec_out = self.dec_dropout(decoded)
82
+ out = self.fc_out(dec_out)
83
+
84
+ return out, mu, log_var
85
+
86
+ class Transformer(nn.Module):
87
+
88
+ def __init__(self, input_dim=1, model_dim=64, num_layers=2, num_heads=4, dropout=0.2):
89
+ super(Transformer, self).__init__()
90
+ self.model_dim = model_dim
91
+ self.num_layers = num_layers
92
+
93
+ self.embedding = nn.Linear(input_dim, model_dim)
94
+
95
+ encoder_layer = nn.TransformerEncoderLayer(
96
+ d_model=model_dim,
97
+ nhead=num_heads,
98
+ dropout=dropout,
99
+ dim_feedforward=2*model_dim, # 128
100
+ batch_first=True
101
+ )
102
+ encoder_norm = nn.LayerNorm(model_dim)
103
+
104
+ self.transformer_encoder = nn.TransformerEncoder(
105
+ encoder_layer,
106
+ num_layers=num_layers,
107
+ norm=encoder_norm
108
+ )
109
+
110
+ decoder_layer = nn.TransformerDecoderLayer(
111
+ d_model=model_dim,
112
+ nhead=num_heads,
113
+ dropout=dropout,
114
+ dim_feedforward=2*model_dim, # 128
115
+ batch_first=True
116
+ )
117
+ decoder_norm = nn.LayerNorm(model_dim)
118
+
119
+ self.transformer_decoder = nn.TransformerDecoder(
120
+ decoder_layer,
121
+ num_layers=num_layers,
122
+ norm=decoder_norm
123
+ )
124
+ self.output = nn.Linear(model_dim, input_dim)
125
+
126
+ def forward(self, x):
127
+ embed_x = self.embedding(x)
128
+ enc_out = self.transformer_encoder(embed_x)
129
+ dec_out = self.transformer_decoder(embed_x, enc_out)
130
+ out = self.output(dec_out)
131
+ return out
132
+
src/pipeline/preprocesser.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Class for preprocessing the data before the training phase
2
+ import os
3
+ import pandas as pd
4
+ import numpy as np
5
+ import logging
6
+ from sklearn.preprocessing import StandardScaler
7
+ from adtk.data import validate_series, to_events
8
+ from adtk.detector import SeasonalAD
9
+ from path_config import DATA_DIR
10
+
11
+
12
+ class Preprocessor:
13
+
14
+ def __init__(self):
15
+ self.scaler = StandardScaler()
16
+ self.raw_data = None
17
+ self.anomalies_events = None
18
+ self.logger = logging.getLogger(__name__)
19
+ self.window_size = None
20
+
21
+ def preprocess_data(self, file_path, val_split="2014-10-01", test_split="2014-10-16", window_size=48):
22
+ """
23
+ Preprocess the raw data
24
+
25
+ :param window_size: The size of the sliding window, default is 48
26
+ :param file_path: Path to the CSV file
27
+ """
28
+ # Load the raw data
29
+ self.logger.info("Loading raw data...")
30
+ self.load_raw_data(file_path)
31
+
32
+ # Detect anomalies and convert them to events
33
+ self.logger.info("Detecting anomalies...")
34
+ self.load_anomalies_events()
35
+
36
+ # Label the anomalies in the raw data
37
+ self.logger.info("Labeling anomalies...")
38
+ self._label_anomalies()
39
+
40
+ # Split the data chronologically
41
+ self.logger.info("Splitting the data into training, validation and testing set...")
42
+ train_data, val_data, test_data = self._chronological_split(val_split=val_split, test_split=test_split)
43
+
44
+ # Split the data into features and target
45
+ self.logger.info("Splitting the data into features and target...")
46
+ X_train, y_train = self._split_features_target(train_data)
47
+ X_val, y_val = self._split_features_target(val_data)
48
+ X_test, y_test = self._split_features_target(test_data)
49
+
50
+ # Scale the data
51
+ self.logger.info("Scaling the data...")
52
+ train_scaled, val_scaled, test_scaled = self._scale_data(X_train, X_val, X_test)
53
+
54
+ # Create a sliding window of data
55
+ self.logger.info(f"Creating sliding window with the length of {window_size} from the data...")
56
+ train_sequences = self._create_sliding_window(train_scaled, window_size=window_size)
57
+ val_sequences = self._create_sliding_window(val_scaled, window_size=window_size)
58
+ test_sequences = self._create_sliding_window(test_scaled, window_size=window_size)
59
+
60
+ self.window_size = window_size
61
+
62
+ # Save the preprocessed data
63
+ self.logger.info("Saving the preprocessed data...")
64
+ self.save_preprocessed_data(train_sequences, "train_features.npy")
65
+ self.save_preprocessed_data(val_sequences, "val_features.npy")
66
+ self.save_preprocessed_data(test_sequences, "test_features.npy")
67
+ self.save_preprocessed_data(y_train.values, "train_labels.npy")
68
+ self.save_preprocessed_data(y_val.values, "val_labels.npy")
69
+ self.save_preprocessed_data(y_test.values, "test_labels.npy")
70
+
71
+ print("Preprocessing completed!")
72
+
73
+ def load_raw_data(self, file_path):
74
+ """
75
+ Load raw data from a CSV file
76
+
77
+ :param file_path: Path to the CSV file
78
+ """
79
+ try:
80
+ df = pd.read_csv(
81
+ file_path,
82
+ usecols=["timestamp", "value"],
83
+ index_col="timestamp",
84
+ parse_dates=True,
85
+ )
86
+ df.sort_index(inplace=True)
87
+
88
+ # Rename the columns
89
+ df.rename(columns={"value": "Traffic"}, inplace=True)
90
+ df.index.rename("Timestamp", inplace=True)
91
+
92
+ # Validate the time series
93
+ self.raw_data = validate_series(df)
94
+
95
+ except FileNotFoundError as e:
96
+ print(f"File path does not exist: {file_path}")
97
+
98
+ def load_anomalies_events(self):
99
+ """
100
+ Load the anomalies events
101
+ """
102
+ if self.raw_data is None:
103
+ print("Raw data is not loaded")
104
+ return
105
+
106
+ events = [
107
+ ('2014-07-04 00:00:00', '2014-07-06 23:59:59'), # Independence Day Celebration
108
+ ('2014-09-01 00:00:00', '2014-09-01 23:59:59'), # Labour Day
109
+ ('2014-11-02 00:00:00', '2014-11-02 11:59:59'), # NYC Marathon 2014
110
+ ('2014-11-27 00:00:00', '2014-11-27 23:59:59'), # Thanksgiving Day
111
+ ('2014-12-25 00:00:00', '2014-12-26 23:59:59'), # Christmas Holiday
112
+ ('2015-01-01 00:00:00', '2015-01-01 23:59:59'), # New Year
113
+ ('2015-01-26 12:00:00', '2015-01-28 11:59:59') # Snowstorm
114
+ ]
115
+
116
+ # Store the events
117
+ self.anomalies_events = events
118
+
119
+ def _label_anomalies(self):
120
+ """
121
+ Label the anomalies in the raw data
122
+ """
123
+ if self.raw_data is None:
124
+ print("Raw data is not loaded")
125
+ return
126
+
127
+ if self.anomalies_events is None:
128
+ print("Anomalies are not detected")
129
+ return
130
+
131
+ # Label the anomalies as 1 and 0
132
+ self.raw_data["Anomaly"] = 0
133
+ for start, end in self.anomalies_events:
134
+ self.raw_data.loc[start:end, "Anomaly"] = 1
135
+
136
+ def _chronological_split(self, val_split="2014-10-01", test_split="2014-10-16"):
137
+ """
138
+ Split the data chronologically into train, validation, and test sets
139
+
140
+ :param val_split: Validation split date
141
+ :param test_split: Test split date
142
+ """
143
+ if self.raw_data is None:
144
+ print("Raw data is not loaded")
145
+ return
146
+
147
+ # Split the data
148
+ train_data = self.raw_data.loc[self.raw_data.index < val_split]
149
+ val_data = self.raw_data.loc[
150
+ (self.raw_data.index >= val_split) & (self.raw_data.index < test_split)
151
+ ]
152
+ test_data = self.raw_data.loc[self.raw_data.index >= test_split]
153
+
154
+ return train_data, val_data, test_data
155
+
156
+ def _split_features_target(self, data, target_col="Anomaly"):
157
+ """
158
+ Split the data into features and target
159
+
160
+ :param data: DataFrame containing the data
161
+ :param target_col: Column to predict
162
+ """
163
+ # Split the data into features and target
164
+ X = data.drop(columns=[target_col])
165
+ y = data[target_col]
166
+
167
+ return X, y
168
+
169
+ def _scale_data(self, train_data, val_data, test_data):
170
+ """
171
+ Scale the data using StandardScaler
172
+
173
+ :param train_data: Training data
174
+ :param val_data: Validation data
175
+ :param test_data: Test data
176
+ """
177
+ if self.scaler is None:
178
+ self.scaler = StandardScaler()
179
+
180
+ # Fit and transform the training data
181
+ train_scaled = self.scaler.fit_transform(train_data)
182
+ val_scaled = self.scaler.transform(val_data)
183
+ test_scaled = self.scaler.transform(test_data)
184
+
185
+ return train_scaled, val_scaled, test_scaled
186
+
187
+ def _create_sliding_window(self, data, window_size=48, step_size=1):
188
+ """
189
+ Create a sliding window of data
190
+
191
+ :param data: Scaled data
192
+ :param window_size: Size of the window
193
+ :param step_size: Step size for the window
194
+ """
195
+ sequences = []
196
+
197
+ for i in range(0, len(data) - window_size + 1, step_size):
198
+ sequences.append(data[i : i + window_size])
199
+
200
+ return np.array(sequences)
201
+
202
+ def save_preprocessed_data(self, data, file_path):
203
+ """
204
+ Save preprocessed data to a .npy file
205
+
206
+ :param data: Preprocessed data
207
+ :param file_path: Path to save the .npy file
208
+ """
209
+ dir_path = os.path.join(DATA_DIR, "preprocessed_data")
210
+
211
+ if not os.path.exists(dir_path):
212
+ os.makedirs(dir_path)
213
+
214
+ file_path = os.path.join(dir_path, file_path)
215
+
216
+ np.save(file_path, data)
217
+ self.logger.info(f"{file_path} has been saved successfully!")
218
+
219
+ def get_seq_length(self):
220
+ """
221
+ Get the length of the sequence
222
+ """
223
+ return self.window_size
src/pipeline/trainer.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import copy
4
+ import torch
5
+ import torch.nn as nn
6
+ from torch.optim import Adam
7
+ from src.pipeline import VanillaLSTM, VAE, Transformer
8
+
9
+
10
+ # Class for model training and evaluation
11
+ class Trainer:
12
+
13
+ def __init__(self):
14
+ self.logger = logging.getLogger(__name__)
15
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ self.batch_size = None,
17
+ self.model = None
18
+ self.model_type = None
19
+ self.optimizer = None
20
+ self.criterion = None
21
+ self.train_loader = None
22
+ self.val_loader = None
23
+ self.test_loader = None
24
+ self.n_epochs = None
25
+ self.train_history = { 'train_loss': [], 'val_loss': [] }
26
+ self.best_model = None
27
+ self.best_val_loss = float('inf')
28
+
29
+ def init_model(self, model, model_type):
30
+ """
31
+ Initialize the model, optimizer and loss function
32
+
33
+ :param model: The model architecture
34
+ :param model_type: The type of the model
35
+ """
36
+ self.logger.info("Initialize the model...")
37
+
38
+ self.model = model.to(self.device)
39
+ if model_type not in ["lstm", "vae", "transformer"]:
40
+ raise ValueError("Model type not supported")
41
+ self.model_type = model_type
42
+
43
+
44
+ def config_train(self, batch_size=32, n_epochs=20, lr=0.001):
45
+ """
46
+ Configure the training parameters
47
+
48
+ :param batch_size: The batch size, default is 32
49
+ :param n_epochs: The number of epochs, default is 20
50
+ :param lr: The learning rate, default is 0.001
51
+ """
52
+ self.logger.info("Configure the training parameters...")
53
+
54
+ self.batch_size = batch_size
55
+ self.n_epochs = n_epochs
56
+
57
+ self.optimizer = Adam(self.model.parameters(), lr=lr)
58
+ self.criterion = nn.MSELoss()
59
+
60
+ def train(self, train_loader, val_loader):
61
+ """
62
+ Train the model
63
+
64
+ :param train_loader: The training data loader
65
+ :param val_loader: The validation data loader
66
+ """
67
+ print("Training the model...")
68
+ self.logger.info("Start training...")
69
+
70
+ self.train_loader = train_loader
71
+ self.val_loader = val_loader
72
+
73
+ self.best_val_loss = float('inf')
74
+ self.best_model = None
75
+
76
+ for epoch in range(self.n_epochs):
77
+ train_loss = self._train_epoch()
78
+ val_loss = self._val_epoch()
79
+
80
+ self.train_history['train_loss'].append(train_loss)
81
+ self.train_history['val_loss'].append(val_loss)
82
+
83
+ self.logger.info(f"Epoch {epoch + 1}/{self.n_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
84
+
85
+ self.logger.info("Training completed!")
86
+
87
+ print("Training completed!")
88
+
89
+ return self.best_model, self.train_history
90
+
91
+ def _train_epoch(self):
92
+ """
93
+ Train the model for one epoch
94
+ """
95
+ self.model.train()
96
+ train_loss = 0
97
+
98
+ for seq in self.train_loader:
99
+
100
+ self.optimizer.zero_grad()
101
+
102
+ if self.model_type == "lstm":
103
+ X_train = seq[:, :-1, :] # All timestamp except the last one
104
+ y_train = seq[:, -1, :] # Final timestamp
105
+
106
+ X_train = X_train.to(self.device)
107
+ y_train = y_train.to(self.device)
108
+
109
+ output = self.model(X_train)
110
+ loss = self.criterion(output, y_train)
111
+
112
+ elif self.model_type == "vae":
113
+ X = seq.to(self.device)
114
+ recon_X, mu, logvar = self.model(X)
115
+ recon_loss = self.criterion(recon_X, X)
116
+ kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / X.size(0)
117
+ loss = recon_loss + 0.2 * kl_div
118
+
119
+ elif self.model_type == "transformer":
120
+ X = seq.to(self.device)
121
+
122
+ recon_X = self.model(X)
123
+ loss = self.criterion(recon_X, X)
124
+ else:
125
+ raise ValueError("Model type not supported")
126
+
127
+ loss.backward()
128
+ self.optimizer.step()
129
+
130
+ train_loss += loss.item()
131
+
132
+ return train_loss / len(self.train_loader)
133
+
134
+ def _val_epoch(self):
135
+ """
136
+ Validate the model for one epoch
137
+ """
138
+ self.model.eval()
139
+ val_loss = 0
140
+
141
+ with torch.no_grad():
142
+ for seq in self.val_loader:
143
+
144
+ if self.model_type == "lstm":
145
+ X_val = seq[:, :-1, :]
146
+ y_val = seq[:, -1, :]
147
+
148
+ X_val = X_val.to(self.device)
149
+ y_val = y_val.to(self.device)
150
+
151
+ output = self.model(X_val)
152
+ loss = self.criterion(output, y_val)
153
+
154
+ elif self.model_type == "vae":
155
+ X = seq.to(self.device)
156
+ recon_X, mu, logvar = self.model(X)
157
+ recon_loss = self.criterion(recon_X, X)
158
+ kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / X.size(0)
159
+ loss = recon_loss + 0.2 * kl_div
160
+
161
+ elif self.model_type == "transformer":
162
+ X_val = seq.to(self.device)
163
+
164
+ recon_X = self.model(X_val)
165
+ loss = self.criterion(recon_X, X_val)
166
+
167
+ else:
168
+ raise ValueError("Model type not supported")
169
+
170
+ val_loss += loss.item()
171
+
172
+ if val_loss < self.best_val_loss:
173
+ self.best_model = copy.deepcopy(self.model)
174
+ self.best_val_loss = val_loss
175
+
176
+ return val_loss / len(self.val_loader)
src/pipeline/utils.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import torch
4
+ import torch.nn as nn
5
+ from path_config import MODEL_DIR
6
+
7
+ def save_model(model, model_name):
8
+ """
9
+ Save the trained model
10
+ """
11
+ os.makedirs(MODEL_DIR, exist_ok=True)
12
+
13
+ model_path = os.path.join(MODEL_DIR, model_name)
14
+ torch.save(model.state_dict(), model_path)
15
+ logging.info(f"Model saved at {model_path}")
16
+ print("Saved successfully!")
src/train.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.config.config import setup_logging
2
+ from pipeline import Preprocessor, NYCDataLoader, Trainer, VanillaLSTM, Transformer, VAE, save_model
3
+ from path_config import RAW_DATA_PATH
4
+
5
+ def train():
6
+
7
+ seq_length = 24
8
+
9
+ setup_logging()
10
+
11
+ # Preprocess the data
12
+ preprocessor = Preprocessor()
13
+ preprocessor.preprocess_data(file_path=RAW_DATA_PATH, window_size=seq_length)
14
+
15
+ # Load the preprocessed data
16
+ data_loader = NYCDataLoader(batch_size=32)
17
+ train_loader, val_loader, test_loader = data_loader.load_data()
18
+
19
+ # Initialize the Trainer
20
+ trainer = Trainer()
21
+
22
+ # Train Vanilla LSTM model
23
+ trainer.init_model(model=VanillaLSTM(), model_type="lstm")
24
+ trainer.config_train(batch_size=32, n_epochs=20, lr=0.001)
25
+ lstm_model, lstm_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
26
+
27
+ # Train VAE model
28
+ trainer.init_model(model=VAE(seq_len=seq_length), model_type="vae")
29
+ trainer.config_train(batch_size=32, n_epochs=20, lr=0.001)
30
+ vae_model, vae_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
31
+
32
+ # Train Transformer model
33
+ trainer.init_model(model=Transformer(), model_type="transformer")
34
+ trainer.config_train(batch_size=32, n_epochs=5, lr=0.001)
35
+ transformer_model, transformer_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
36
+
37
+ # Save the models
38
+ save_model(lstm_model, "lstm_model_small.pth")
39
+ save_model(vae_model, "vae_model_small.pth")
40
+ save_model(transformer_model, "transformer_model_small.pth")
41
+
42
+
43
+ if __name__ == '__main__':
44
+ train()