Spaces:
Sleeping
Sleeping
Upload 12 files
Browse files- .streamlit/config.toml +6 -0
- src/app.py +231 -61
- src/config/config.py +10 -0
- src/inference.py +25 -0
- src/logs/train.log +300 -0
- src/pipeline/__init__.py +7 -0
- src/pipeline/dataloader.py +65 -0
- src/pipeline/model.py +132 -0
- src/pipeline/preprocesser.py +223 -0
- src/pipeline/trainer.py +176 -0
- src/pipeline/utils.py +16 -0
- src/train.py +44 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
primaryColor = "#FFCC00" # Taxi yellow
|
3 |
+
backgroundColor = "#F0F0F0" # Light gray resembling city streets
|
4 |
+
secondaryBackgroundColor = "#FFFFFF" # White for clean sidebar contrast
|
5 |
+
textColor = "#333333" # Dark gray for readability
|
6 |
+
font = "sans serif" # Modern, clean font style
|
src/app.py
CHANGED
@@ -1,17 +1,36 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
import plotly.express as px
|
5 |
import plotly.graph_objs as go
|
|
|
6 |
from sklearn.preprocessing import StandardScaler
|
7 |
-
from
|
8 |
-
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
|
|
11 |
class NYCTaxiAnomalyDetector:
|
12 |
def __init__(self, data):
|
13 |
self.data = data.copy()
|
14 |
self.scaler = StandardScaler()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def filter_by_date_range(self, start_date, end_date):
|
17 |
"""
|
@@ -41,15 +60,16 @@ class NYCTaxiAnomalyDetector:
|
|
41 |
:return: Scaled data and original index
|
42 |
"""
|
43 |
# Ensure the column is numeric
|
44 |
-
data[column] = pd.to_numeric(data[column], errors="coerce")
|
45 |
|
46 |
# Remove NaN values
|
47 |
clean_data = data[column].dropna()
|
48 |
|
49 |
# Scale the data
|
50 |
scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))
|
|
|
51 |
|
52 |
-
return
|
53 |
|
54 |
def detect_anomalies(self, data, column, contamination=0.05):
|
55 |
"""
|
@@ -60,77 +80,176 @@ class NYCTaxiAnomalyDetector:
|
|
60 |
:param contamination: Expected proportion of outliers
|
61 |
:return: DataFrame with anomaly detection results
|
62 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
# Preprocess data
|
64 |
-
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
#
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
# Create results DataFrame
|
71 |
anomaly_results = pd.DataFrame(
|
72 |
{
|
73 |
-
"date": original_index,
|
74 |
-
|
75 |
-
"is_anomaly":
|
|
|
|
|
76 |
}
|
77 |
)
|
78 |
|
79 |
return anomaly_results
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
class AIContextGenerator:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
def generate_context(self, anomaly_date):
|
84 |
"""
|
85 |
-
Generate potential context for the anomaly
|
86 |
|
87 |
:param anomaly_date: Date of the anomaly
|
88 |
-
:return: List of contextual insights
|
89 |
"""
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
"description": f"Weather conditions on {anomaly_date.date()}",
|
95 |
-
"severity": "High",
|
96 |
-
},
|
97 |
-
{
|
98 |
-
"type": "Event",
|
99 |
-
"description": f"City events around {anomaly_date.date()}",
|
100 |
-
"severity": "Medium",
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"type": "Economic",
|
104 |
-
"description": f"Economic factors on {anomaly_date.date()}",
|
105 |
-
"severity": "Low",
|
106 |
-
},
|
107 |
-
]
|
108 |
-
return contexts
|
109 |
|
110 |
|
111 |
-
def load_nyc_taxi_data():
|
112 |
"""
|
113 |
-
Load and preprocess NYC Taxi dataset
|
114 |
|
115 |
-
:return: DataFrame with
|
116 |
"""
|
117 |
-
#
|
118 |
-
|
119 |
-
base_traffic = np.random.normal(5000, 500, len(dates))
|
120 |
|
121 |
-
#
|
122 |
-
|
123 |
-
|
124 |
-
base_traffic[300] = 12000 # Another spike
|
125 |
|
126 |
-
|
|
|
127 |
|
128 |
return df
|
129 |
|
130 |
|
131 |
def main():
|
132 |
st.set_page_config(
|
133 |
-
page_title="NYC Taxi Traffic Anomaly Detection",
|
|
|
|
|
|
|
134 |
)
|
135 |
|
136 |
st.title("🚕 NYC Taxi Traffic Anomaly Detection")
|
@@ -180,8 +299,16 @@ def main():
|
|
180 |
filtered_data, "daily_traffic", contamination=anomaly_threshold
|
181 |
)
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
# Visualization
|
184 |
-
st.header("Daily Taxi Traffic Trend")
|
185 |
fig = px.line(
|
186 |
filtered_data,
|
187 |
x="date",
|
@@ -191,7 +318,6 @@ def main():
|
|
191 |
)
|
192 |
|
193 |
# Highlight Anomalies
|
194 |
-
anomaly_points = filtered_data[anomalies["is_anomaly"]]
|
195 |
fig.add_trace(
|
196 |
go.Scatter(
|
197 |
x=anomaly_points["date"],
|
@@ -205,29 +331,73 @@ def main():
|
|
205 |
st.plotly_chart(fig, use_container_width=True)
|
206 |
|
207 |
# Anomaly Details
|
208 |
-
st.header("
|
209 |
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
context_generator = AIContextGenerator()
|
212 |
|
213 |
-
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
col1, col2 = st.columns(2)
|
217 |
|
218 |
with col1:
|
219 |
-
st.
|
|
|
|
|
|
|
|
|
220 |
|
221 |
with col2:
|
222 |
contexts = context_generator.generate_context(anomaly["date"])
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
else:
|
232 |
st.info("No significant anomalies detected with current settings.")
|
233 |
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
import plotly.express as px
|
4 |
import plotly.graph_objs as go
|
5 |
+
import numpy as np
|
6 |
from sklearn.preprocessing import StandardScaler
|
7 |
+
from dataclasses import dataclass
|
8 |
+
from datetime import datetime
|
9 |
+
import torch
|
10 |
+
import torch.nn as nn
|
11 |
+
import os
|
12 |
+
from torch.utils.data import DataLoader, TensorDataset
|
13 |
+
from path_config import MODEL_DIR
|
14 |
+
from pipeline import Transformer
|
15 |
|
16 |
|
17 |
+
@dataclass()
|
18 |
class NYCTaxiAnomalyDetector:
|
19 |
def __init__(self, data):
|
20 |
self.data = data.copy()
|
21 |
self.scaler = StandardScaler()
|
22 |
+
self.model = None
|
23 |
+
self.TRANSFORMER_S_MODEL_PATH = os.path.join(
|
24 |
+
MODEL_DIR, "transformer_model_small.pth"
|
25 |
+
)
|
26 |
+
|
27 |
+
def create_sequences(self, data, seq_length=24):
|
28 |
+
"""Create sequences for the transformer model"""
|
29 |
+
sequences = []
|
30 |
+
values = data.reshape(-1, 1)
|
31 |
+
for i in range(len(values) - seq_length + 1):
|
32 |
+
sequences.append(values[i : i + seq_length])
|
33 |
+
return np.array(sequences)
|
34 |
|
35 |
def filter_by_date_range(self, start_date, end_date):
|
36 |
"""
|
|
|
60 |
:return: Scaled data and original index
|
61 |
"""
|
62 |
# Ensure the column is numeric
|
63 |
+
data.loc[:, column] = pd.to_numeric(data[column], errors="coerce")
|
64 |
|
65 |
# Remove NaN values
|
66 |
clean_data = data[column].dropna()
|
67 |
|
68 |
# Scale the data
|
69 |
scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))
|
70 |
+
sequences = self.create_sequences(scaled_data)
|
71 |
|
72 |
+
return sequences, clean_data.index[23:]
|
73 |
|
74 |
def detect_anomalies(self, data, column, contamination=0.05):
|
75 |
"""
|
|
|
80 |
:param contamination: Expected proportion of outliers
|
81 |
:return: DataFrame with anomaly detection results
|
82 |
"""
|
83 |
+
if self.model is None:
|
84 |
+
self.model = Transformer()
|
85 |
+
self.model.load_state_dict(
|
86 |
+
torch.load(self.TRANSFORMER_S_MODEL_PATH, weights_only=True)
|
87 |
+
)
|
88 |
+
self.model.eval()
|
89 |
+
|
90 |
# Preprocess data
|
91 |
+
sequences, original_index = self.preprocess_data(data, column)
|
92 |
+
|
93 |
+
# Create DataLoader
|
94 |
+
dataset = TensorDataset(torch.FloatTensor(sequences))
|
95 |
+
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)
|
96 |
|
97 |
+
# Calculate threshold percentile from contamination
|
98 |
+
threshold_percentile = (1 - contamination) * 100
|
99 |
+
|
100 |
+
# Detect anomalies
|
101 |
+
reconstruction_errors, predictions, anomalies, optimal_threshold = (
|
102 |
+
self.detect_anomalies_batch(
|
103 |
+
self.model, test_loader, threshold_percentile=threshold_percentile
|
104 |
+
)
|
105 |
+
)
|
106 |
|
107 |
# Create results DataFrame
|
108 |
anomaly_results = pd.DataFrame(
|
109 |
{
|
110 |
+
"date": data.loc[original_index, "date"],
|
111 |
+
column: data.loc[original_index, column],
|
112 |
+
"is_anomaly": anomalies,
|
113 |
+
"reconstruction_error": reconstruction_errors,
|
114 |
+
"prediction": predictions,
|
115 |
}
|
116 |
)
|
117 |
|
118 |
return anomaly_results
|
119 |
|
120 |
+
def detect_anomalies_batch(self, model, test_loader, threshold_percentile=99.7):
|
121 |
+
"""Detect anomalies in batches"""
|
122 |
+
reconstruction_errors = []
|
123 |
+
predictions = []
|
124 |
+
|
125 |
+
with torch.no_grad():
|
126 |
+
for seq_true in test_loader:
|
127 |
+
x = seq_true[0] # Remove extra dimension from TensorDataset
|
128 |
+
pred = model(x)
|
129 |
+
# Calculate reconstruction error for each sequence
|
130 |
+
errors = torch.mean(
|
131 |
+
torch.abs(pred - x), dim=(1, 2)
|
132 |
+
) # Mean over sequence length and features
|
133 |
+
reconstruction_errors.extend(errors.cpu().numpy())
|
134 |
+
predictions.extend(
|
135 |
+
pred[:, -1, 0].cpu().numpy()
|
136 |
+
) # Take last timestep prediction
|
137 |
+
|
138 |
+
reconstruction_errors = np.array(reconstruction_errors)
|
139 |
+
predictions = np.array(predictions)
|
140 |
+
optimal_threshold = np.percentile(reconstruction_errors, threshold_percentile)
|
141 |
+
anomalies = (reconstruction_errors > optimal_threshold).astype(int)
|
142 |
+
|
143 |
+
return reconstruction_errors, predictions, anomalies, optimal_threshold
|
144 |
+
|
145 |
+
|
146 |
+
@dataclass()
|
147 |
class AIContextGenerator:
|
148 |
+
predefined_anomalies = {
|
149 |
+
datetime(2014, 11, 2).date(): [
|
150 |
+
{
|
151 |
+
"type": "NYC Marathon",
|
152 |
+
"description": "No significant anomalies detected with current settings. No significant anomalies detected with current settings. No significant anomalies detected with current settings.",
|
153 |
+
"reference": None,
|
154 |
+
}
|
155 |
+
],
|
156 |
+
datetime(2014, 11, 27).date(): [
|
157 |
+
{
|
158 |
+
"type": "Thanksgiving Day",
|
159 |
+
"description": "Thanksgiving Day",
|
160 |
+
"reference": None,
|
161 |
+
}
|
162 |
+
],
|
163 |
+
datetime(2014, 12, 25).date(): [
|
164 |
+
{
|
165 |
+
"type": "Christmas",
|
166 |
+
"description": "Christmas",
|
167 |
+
"reference": {
|
168 |
+
"text": "NYC Marathon 2014",
|
169 |
+
"url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
|
170 |
+
},
|
171 |
+
}
|
172 |
+
],
|
173 |
+
datetime(2015, 1, 1).date(): [
|
174 |
+
{
|
175 |
+
"type": "Event",
|
176 |
+
"description": "New Year's Day",
|
177 |
+
"reference": {
|
178 |
+
"text": "NYC Marathon 2014",
|
179 |
+
"url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
|
180 |
+
},
|
181 |
+
}
|
182 |
+
],
|
183 |
+
datetime(2015, 1, 26).date(): [
|
184 |
+
{
|
185 |
+
"type": "Event",
|
186 |
+
"description": "Snowstorm",
|
187 |
+
"reference": {
|
188 |
+
"text": "NYC Marathon 2014",
|
189 |
+
"url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
|
190 |
+
},
|
191 |
+
}
|
192 |
+
],
|
193 |
+
datetime(2015, 1, 27).date(): [
|
194 |
+
{
|
195 |
+
"type": "Event",
|
196 |
+
"description": "Snowstorm",
|
197 |
+
"reference": {
|
198 |
+
"text": "NYC Marathon 2014",
|
199 |
+
"url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
|
200 |
+
},
|
201 |
+
}
|
202 |
+
],
|
203 |
+
datetime(2014, 7, 1).date(): [
|
204 |
+
{
|
205 |
+
"type": "Event",
|
206 |
+
"description": "Testing",
|
207 |
+
"reference": {
|
208 |
+
"text": "NYC Marathon 2014",
|
209 |
+
"url": "https://en.wikipedia.org/wiki/2014_New_York_City_Marathon",
|
210 |
+
},
|
211 |
+
}
|
212 |
+
],
|
213 |
+
}
|
214 |
+
|
215 |
def generate_context(self, anomaly_date):
|
216 |
"""
|
217 |
+
Generate potential context for the anomaly if predefined
|
218 |
|
219 |
:param anomaly_date: Date of the anomaly
|
220 |
+
:return: List of contextual insights if available, else None
|
221 |
"""
|
222 |
+
if isinstance(anomaly_date, pd.Timestamp):
|
223 |
+
anomaly_date = anomaly_date.date()
|
224 |
+
|
225 |
+
return self.predefined_anomalies.get(anomaly_date, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
|
228 |
+
def load_nyc_taxi_data(file_path="data/nyc_taxi_traffic_data.csv"):
|
229 |
"""
|
230 |
+
Load and preprocess NYC Taxi dataset from a CSV file.
|
231 |
|
232 |
+
:return: DataFrame with taxi traffic data
|
233 |
"""
|
234 |
+
# Load the CSV file
|
235 |
+
df = pd.read_csv(file_path)
|
|
|
236 |
|
237 |
+
# Ensure timestamp column is datetime and rename columns for consistency
|
238 |
+
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
239 |
+
df.rename(columns={"timestamp": "date", "value": "daily_traffic"}, inplace=True)
|
|
|
240 |
|
241 |
+
# Sort by date to ensure proper time-series ordering
|
242 |
+
df = df.sort_values(by="date").reset_index(drop=True)
|
243 |
|
244 |
return df
|
245 |
|
246 |
|
247 |
def main():
|
248 |
st.set_page_config(
|
249 |
+
page_title="NYC Taxi Traffic Anomaly Detection",
|
250 |
+
page_icon="🚕",
|
251 |
+
layout="wide",
|
252 |
+
initial_sidebar_state="expanded",
|
253 |
)
|
254 |
|
255 |
st.title("🚕 NYC Taxi Traffic Anomaly Detection")
|
|
|
299 |
filtered_data, "daily_traffic", contamination=anomaly_threshold
|
300 |
)
|
301 |
|
302 |
+
# Get anomaly points for visualization
|
303 |
+
anomaly_points = anomalies[anomalies["is_anomaly"] == 1]
|
304 |
+
|
305 |
+
# Filter true anomalies based on predefined anomalies
|
306 |
+
true_anomaly_points = anomaly_points[
|
307 |
+
anomaly_points["date"].dt.date.isin(AIContextGenerator.predefined_anomalies)
|
308 |
+
]
|
309 |
+
|
310 |
# Visualization
|
311 |
+
st.header("Daily Taxi Traffic Trend ✨")
|
312 |
fig = px.line(
|
313 |
filtered_data,
|
314 |
x="date",
|
|
|
318 |
)
|
319 |
|
320 |
# Highlight Anomalies
|
|
|
321 |
fig.add_trace(
|
322 |
go.Scatter(
|
323 |
x=anomaly_points["date"],
|
|
|
331 |
st.plotly_chart(fig, use_container_width=True)
|
332 |
|
333 |
# Anomaly Details
|
334 |
+
st.header("Insights of Anomalies with Known Events 📈")
|
335 |
|
336 |
+
# Calculate metrics using the anomalies DataFrame
|
337 |
+
total_anomalies_detected = len(anomaly_points)
|
338 |
+
true_anomalies = len(true_anomaly_points)
|
339 |
+
false_anomalies = total_anomalies_detected - true_anomalies
|
340 |
+
|
341 |
+
st.sidebar.subheader("Summary")
|
342 |
+
st.sidebar.metric("Total Anomalies Detected:", total_anomalies_detected)
|
343 |
+
st.sidebar.metric("Anomalies with Known Events:", true_anomalies)
|
344 |
+
st.sidebar.metric("Unexplained Anomalies:", false_anomalies)
|
345 |
+
|
346 |
+
if not true_anomaly_points.empty:
|
347 |
context_generator = AIContextGenerator()
|
348 |
|
349 |
+
# Group by date and calculate min/max traffic
|
350 |
+
grouped_anomalies = (
|
351 |
+
true_anomaly_points.groupby(true_anomaly_points["date"].dt.date)
|
352 |
+
.agg({"daily_traffic": ["min", "max"]})
|
353 |
+
.reset_index()
|
354 |
+
)
|
355 |
+
|
356 |
+
# Flatten the multi-level columns
|
357 |
+
grouped_anomalies.columns = ["date", "min_traffic", "max_traffic"]
|
358 |
+
|
359 |
+
for _, anomaly in grouped_anomalies.iterrows():
|
360 |
|
361 |
col1, col2 = st.columns(2)
|
362 |
|
363 |
with col1:
|
364 |
+
st.subheader(f"Anomaly on {anomaly['date']}")
|
365 |
+
traffic_range = (
|
366 |
+
f"{anomaly['min_traffic']:.0f}-{anomaly['max_traffic']:.0f}"
|
367 |
+
)
|
368 |
+
st.metric("Taxi Rides Range", traffic_range)
|
369 |
|
370 |
with col2:
|
371 |
contexts = context_generator.generate_context(anomaly["date"])
|
372 |
+
if contexts:
|
373 |
+
for context in contexts:
|
374 |
+
st.subheader(f"Event: {context['type']}")
|
375 |
+
|
376 |
+
reference_text = (
|
377 |
+
context["reference"]["text"]
|
378 |
+
if context["reference"]
|
379 |
+
else "-"
|
380 |
+
)
|
381 |
+
reference_url = (
|
382 |
+
context["reference"]["url"]
|
383 |
+
if context["reference"] and context["reference"]["url"]
|
384 |
+
else ""
|
385 |
+
)
|
386 |
+
|
387 |
+
url = (
|
388 |
+
f"[{reference_text}]({reference_url})"
|
389 |
+
if reference_url
|
390 |
+
else reference_text
|
391 |
+
)
|
392 |
+
|
393 |
+
st.markdown(
|
394 |
+
f"""
|
395 |
+
- Description: {context['description']}
|
396 |
+
- Reference: {url}
|
397 |
+
"""
|
398 |
+
)
|
399 |
+
else:
|
400 |
+
st.write("No significant event available for this anomaly.")
|
401 |
else:
|
402 |
st.info("No significant anomalies detected with current settings.")
|
403 |
|
src/config/config.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from path_config import LOG_DIR
|
4 |
+
|
5 |
+
def setup_logging():
|
6 |
+
logging.basicConfig(
|
7 |
+
filename=os.path.join(LOG_DIR, 'train.log'),
|
8 |
+
level=logging.INFO,
|
9 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
10 |
+
)
|
src/inference.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.config.config import setup_logging
|
2 |
+
from src.pipeline import NYCDataLoader, VanillaLSTM, Transformer, VAE, AnomalyDetector
|
3 |
+
|
4 |
+
|
5 |
+
def inference():
|
6 |
+
|
7 |
+
seq_length = 48
|
8 |
+
|
9 |
+
setup_logging()
|
10 |
+
|
11 |
+
# Load the preprocessed data
|
12 |
+
data_loader = NYCDataLoader(batch_size=32)
|
13 |
+
train_loader, _, test_loader = data_loader.load_data()
|
14 |
+
|
15 |
+
# Get the true anomalies
|
16 |
+
true_anomalies = data_loader.get_true_anomalies()
|
17 |
+
|
18 |
+
# Initialize the AnomalyDetector
|
19 |
+
detector = AnomalyDetector()
|
20 |
+
|
21 |
+
# Load the trained models
|
22 |
+
detector.load_data(test_loader=test_loader)
|
23 |
+
detector.load_trained_model("transformer_model.pth", model_type="transformer")
|
24 |
+
|
25 |
+
|
src/logs/train.log
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2025-01-06 14:24:18,937 - pipeline.preprocesser - INFO - Loading raw data...
|
2 |
+
2025-01-06 14:24:19,021 - pipeline.preprocesser - INFO - Detecting anomalies...
|
3 |
+
2025-01-06 14:24:19,021 - pipeline.preprocesser - INFO - Labeling anomalies...
|
4 |
+
2025-01-06 14:25:05,900 - pipeline.preprocesser - INFO - Loading raw data...
|
5 |
+
2025-01-06 14:25:05,939 - pipeline.preprocesser - INFO - Detecting anomalies...
|
6 |
+
2025-01-06 14:25:05,939 - pipeline.preprocesser - INFO - Labeling anomalies...
|
7 |
+
2025-01-06 14:25:05,952 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
8 |
+
2025-01-06 14:25:05,954 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
9 |
+
2025-01-06 14:25:05,956 - pipeline.preprocesser - INFO - Scaling the data...
|
10 |
+
2025-01-06 14:25:05,962 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
|
11 |
+
2025-01-06 14:25:05,970 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
12 |
+
2025-01-06 14:25:05,973 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
13 |
+
2025-01-06 14:25:05,974 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
14 |
+
2025-01-06 14:25:05,975 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
15 |
+
2025-01-06 14:25:05,976 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
16 |
+
2025-01-06 14:25:05,977 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
17 |
+
2025-01-06 14:25:05,978 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
18 |
+
2025-01-06 14:30:56,989 - pipeline.preprocesser - INFO - Loading raw data...
|
19 |
+
2025-01-06 14:30:57,016 - pipeline.preprocesser - INFO - Detecting anomalies...
|
20 |
+
2025-01-06 14:30:57,016 - pipeline.preprocesser - INFO - Labeling anomalies...
|
21 |
+
2025-01-06 14:30:57,026 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
22 |
+
2025-01-06 14:30:57,027 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
23 |
+
2025-01-06 14:30:57,028 - pipeline.preprocesser - INFO - Scaling the data...
|
24 |
+
2025-01-06 14:30:57,031 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
|
25 |
+
2025-01-06 14:30:57,038 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
26 |
+
2025-01-06 14:30:57,040 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
27 |
+
2025-01-06 14:30:57,041 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
28 |
+
2025-01-06 14:30:57,042 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
29 |
+
2025-01-06 14:30:57,043 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
30 |
+
2025-01-06 14:30:57,043 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
31 |
+
2025-01-06 14:30:57,044 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
32 |
+
2025-01-06 14:30:57,122 - pipeline.trainer - INFO - Initialize the model...
|
33 |
+
2025-01-06 14:31:42,480 - pipeline.preprocesser - INFO - Loading raw data...
|
34 |
+
2025-01-06 14:31:42,498 - pipeline.preprocesser - INFO - Detecting anomalies...
|
35 |
+
2025-01-06 14:31:42,498 - pipeline.preprocesser - INFO - Labeling anomalies...
|
36 |
+
2025-01-06 14:31:42,506 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
37 |
+
2025-01-06 14:31:42,508 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
38 |
+
2025-01-06 14:31:42,509 - pipeline.preprocesser - INFO - Scaling the data...
|
39 |
+
2025-01-06 14:31:42,512 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
|
40 |
+
2025-01-06 14:31:42,518 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
41 |
+
2025-01-06 14:31:42,519 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
42 |
+
2025-01-06 14:31:42,521 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
43 |
+
2025-01-06 14:31:42,523 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
44 |
+
2025-01-06 14:31:42,524 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
45 |
+
2025-01-06 14:31:42,525 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
46 |
+
2025-01-06 14:31:42,525 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
47 |
+
2025-01-06 14:31:42,584 - pipeline.trainer - INFO - Initialize the model...
|
48 |
+
2025-01-06 14:32:10,258 - pipeline.preprocesser - INFO - Loading raw data...
|
49 |
+
2025-01-06 14:32:10,275 - pipeline.preprocesser - INFO - Detecting anomalies...
|
50 |
+
2025-01-06 14:32:10,275 - pipeline.preprocesser - INFO - Labeling anomalies...
|
51 |
+
2025-01-06 14:32:10,284 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
52 |
+
2025-01-06 14:32:10,285 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
53 |
+
2025-01-06 14:32:10,286 - pipeline.preprocesser - INFO - Scaling the data...
|
54 |
+
2025-01-06 14:32:10,291 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
|
55 |
+
2025-01-06 14:32:10,297 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
56 |
+
2025-01-06 14:32:10,307 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
57 |
+
2025-01-06 14:32:10,310 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
58 |
+
2025-01-06 14:32:10,314 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
59 |
+
2025-01-06 14:32:10,316 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
60 |
+
2025-01-06 14:32:10,318 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
61 |
+
2025-01-06 14:32:10,319 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
62 |
+
2025-01-06 14:32:10,393 - pipeline.trainer - INFO - Initialize the model...
|
63 |
+
2025-01-06 14:33:33,454 - pipeline.preprocesser - INFO - Loading raw data...
|
64 |
+
2025-01-06 14:33:33,473 - pipeline.preprocesser - INFO - Detecting anomalies...
|
65 |
+
2025-01-06 14:33:33,473 - pipeline.preprocesser - INFO - Labeling anomalies...
|
66 |
+
2025-01-06 14:33:33,482 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
67 |
+
2025-01-06 14:33:33,482 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
68 |
+
2025-01-06 14:33:33,483 - pipeline.preprocesser - INFO - Scaling the data...
|
69 |
+
2025-01-06 14:33:33,488 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
|
70 |
+
2025-01-06 14:33:33,494 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
71 |
+
2025-01-06 14:33:33,498 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
72 |
+
2025-01-06 14:33:33,499 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
73 |
+
2025-01-06 14:33:33,501 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
74 |
+
2025-01-06 14:33:33,501 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
75 |
+
2025-01-06 14:33:33,502 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
76 |
+
2025-01-06 14:33:33,502 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
77 |
+
2025-01-06 14:33:53,220 - pipeline.preprocesser - INFO - Loading raw data...
|
78 |
+
2025-01-06 14:33:53,240 - pipeline.preprocesser - INFO - Detecting anomalies...
|
79 |
+
2025-01-06 14:33:53,240 - pipeline.preprocesser - INFO - Labeling anomalies...
|
80 |
+
2025-01-06 14:33:53,247 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
81 |
+
2025-01-06 14:33:53,248 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
82 |
+
2025-01-06 14:33:53,249 - pipeline.preprocesser - INFO - Scaling the data...
|
83 |
+
2025-01-06 14:33:53,253 - pipeline.preprocesser - INFO - Creating sliding window with the length of 48 from the data...
|
84 |
+
2025-01-06 14:33:53,260 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
85 |
+
2025-01-06 14:33:53,261 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
86 |
+
2025-01-06 14:33:53,263 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
87 |
+
2025-01-06 14:33:53,264 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
88 |
+
2025-01-06 14:33:53,265 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
89 |
+
2025-01-06 14:33:53,266 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
90 |
+
2025-01-06 14:33:53,267 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
91 |
+
2025-01-06 14:33:53,320 - pipeline.trainer - INFO - Initialize the model...
|
92 |
+
2025-01-06 14:33:53,320 - pipeline.trainer - INFO - Configure the training parameters...
|
93 |
+
2025-01-06 14:33:55,517 - pipeline.trainer - INFO - Start training...
|
94 |
+
2025-01-06 14:33:56,930 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.3034, Val Loss: 0.0795
|
95 |
+
2025-01-06 14:33:58,164 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0653, Val Loss: 0.0455
|
96 |
+
2025-01-06 14:33:59,489 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0439, Val Loss: 0.0350
|
97 |
+
2025-01-06 14:34:00,754 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0357, Val Loss: 0.0351
|
98 |
+
2025-01-06 14:34:02,119 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0337, Val Loss: 0.0251
|
99 |
+
2025-01-06 14:34:03,447 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0293, Val Loss: 0.0248
|
100 |
+
2025-01-06 14:34:04,771 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0268, Val Loss: 0.0235
|
101 |
+
2025-01-06 14:34:06,231 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0273, Val Loss: 0.0215
|
102 |
+
2025-01-06 14:34:07,567 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0244, Val Loss: 0.0182
|
103 |
+
2025-01-06 14:34:08,960 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0242, Val Loss: 0.0190
|
104 |
+
2025-01-06 14:34:10,369 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0234, Val Loss: 0.0340
|
105 |
+
2025-01-06 14:34:11,813 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0214, Val Loss: 0.0185
|
106 |
+
2025-01-06 14:34:13,487 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0204, Val Loss: 0.0165
|
107 |
+
2025-01-06 14:34:15,658 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0202, Val Loss: 0.0166
|
108 |
+
2025-01-06 14:34:17,142 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0194, Val Loss: 0.0156
|
109 |
+
2025-01-06 14:34:18,893 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0184, Val Loss: 0.0183
|
110 |
+
2025-01-06 14:34:20,405 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0181, Val Loss: 0.0166
|
111 |
+
2025-01-06 14:34:21,905 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0177, Val Loss: 0.0164
|
112 |
+
2025-01-06 14:34:23,342 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0173, Val Loss: 0.0163
|
113 |
+
2025-01-06 14:34:24,784 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0169, Val Loss: 0.0137
|
114 |
+
2025-01-06 14:34:24,784 - pipeline.trainer - INFO - Training completed!
|
115 |
+
2025-01-06 14:34:24,785 - pipeline.trainer - INFO - Initialize the model...
|
116 |
+
2025-01-06 14:34:24,786 - pipeline.trainer - INFO - Configure the training parameters...
|
117 |
+
2025-01-06 14:34:24,786 - pipeline.trainer - INFO - Start training...
|
118 |
+
2025-01-06 14:34:26,589 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 1.0037, Val Loss: 1.1303
|
119 |
+
2025-01-06 14:34:28,275 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.9997, Val Loss: 1.1426
|
120 |
+
2025-01-06 14:34:30,034 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.9988, Val Loss: 1.1378
|
121 |
+
2025-01-06 14:34:31,756 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.9897, Val Loss: 1.0285
|
122 |
+
2025-01-06 14:34:33,542 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.8906, Val Loss: 0.9805
|
123 |
+
2025-01-06 14:34:35,897 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.8219, Val Loss: 0.8182
|
124 |
+
2025-01-06 14:34:37,835 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.7519, Val Loss: 0.7566
|
125 |
+
2025-01-06 14:34:39,565 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.7407, Val Loss: 0.7998
|
126 |
+
2025-01-06 14:34:41,248 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.7306, Val Loss: 0.7671
|
127 |
+
2025-01-06 14:34:43,010 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.7294, Val Loss: 0.7305
|
128 |
+
2025-01-06 14:34:44,642 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.7223, Val Loss: 0.7628
|
129 |
+
2025-01-06 14:34:46,323 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.7174, Val Loss: 0.7732
|
130 |
+
2025-01-06 14:34:48,097 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.7136, Val Loss: 0.7379
|
131 |
+
2025-01-06 14:34:49,784 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.7142, Val Loss: 0.7372
|
132 |
+
2025-01-06 14:34:51,476 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.7131, Val Loss: 0.7190
|
133 |
+
2025-01-06 14:34:53,172 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.7203, Val Loss: 0.7440
|
134 |
+
2025-01-06 14:34:54,822 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.7083, Val Loss: 0.7466
|
135 |
+
2025-01-06 14:34:56,503 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.7139, Val Loss: 0.7061
|
136 |
+
2025-01-06 14:34:58,186 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.7102, Val Loss: 0.7272
|
137 |
+
2025-01-06 14:34:59,852 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.7139, Val Loss: 0.7414
|
138 |
+
2025-01-06 14:34:59,852 - pipeline.trainer - INFO - Training completed!
|
139 |
+
2025-01-06 14:34:59,871 - pipeline.trainer - INFO - Initialize the model...
|
140 |
+
2025-01-06 14:34:59,871 - pipeline.trainer - INFO - Configure the training parameters...
|
141 |
+
2025-01-06 14:34:59,872 - pipeline.trainer - INFO - Start training...
|
142 |
+
2025-01-06 14:35:08,324 - pipeline.trainer - INFO - Epoch 1/5, Train Loss: 0.0648, Val Loss: 0.0018
|
143 |
+
2025-01-06 14:35:15,897 - pipeline.trainer - INFO - Epoch 2/5, Train Loss: 0.0059, Val Loss: 0.0024
|
144 |
+
2025-01-06 14:35:23,713 - pipeline.trainer - INFO - Epoch 3/5, Train Loss: 0.0032, Val Loss: 0.0012
|
145 |
+
2025-01-06 14:35:31,344 - pipeline.trainer - INFO - Epoch 4/5, Train Loss: 0.0022, Val Loss: 0.0005
|
146 |
+
2025-01-06 14:35:39,079 - pipeline.trainer - INFO - Epoch 5/5, Train Loss: 0.0016, Val Loss: 0.0002
|
147 |
+
2025-01-06 14:35:39,079 - pipeline.trainer - INFO - Training completed!
|
148 |
+
2025-01-06 19:00:28,816 - pipeline.preprocesser - INFO - Loading raw data...
|
149 |
+
2025-01-06 19:00:28,853 - pipeline.preprocesser - INFO - Detecting anomalies...
|
150 |
+
2025-01-06 19:00:28,853 - pipeline.preprocesser - INFO - Labeling anomalies...
|
151 |
+
2025-01-06 19:00:28,876 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
152 |
+
2025-01-06 19:00:28,879 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
153 |
+
2025-01-06 19:00:28,881 - pipeline.preprocesser - INFO - Scaling the data...
|
154 |
+
2025-01-06 19:00:28,889 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
|
155 |
+
2025-01-06 19:00:28,896 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
156 |
+
2025-01-06 19:00:28,898 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
157 |
+
2025-01-06 19:00:28,899 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
158 |
+
2025-01-06 19:00:28,900 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
159 |
+
2025-01-06 19:00:28,901 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
160 |
+
2025-01-06 19:00:28,902 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
161 |
+
2025-01-06 19:00:28,903 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
162 |
+
2025-01-06 19:00:28,986 - pipeline.trainer - INFO - Initialize the model...
|
163 |
+
2025-01-06 19:00:28,987 - pipeline.trainer - INFO - Configure the training parameters...
|
164 |
+
2025-01-06 19:00:31,247 - pipeline.trainer - INFO - Start training...
|
165 |
+
2025-01-06 19:00:32,247 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2837, Val Loss: 0.0671
|
166 |
+
2025-01-06 19:00:33,049 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0577, Val Loss: 0.0383
|
167 |
+
2025-01-06 19:00:34,549 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0440, Val Loss: 0.0301
|
168 |
+
2025-01-06 19:00:35,469 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0352, Val Loss: 0.0342
|
169 |
+
2025-01-06 19:00:36,504 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0309, Val Loss: 0.0212
|
170 |
+
2025-01-06 19:00:37,677 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0281, Val Loss: 0.0234
|
171 |
+
2025-01-06 19:00:39,663 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0254, Val Loss: 0.0234
|
172 |
+
2025-01-06 19:00:41,030 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0242, Val Loss: 0.0203
|
173 |
+
2025-01-06 19:00:42,704 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0233, Val Loss: 0.0263
|
174 |
+
2025-01-06 19:00:44,501 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0226, Val Loss: 0.0194
|
175 |
+
2025-01-06 19:00:45,647 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0215, Val Loss: 0.0191
|
176 |
+
2025-01-06 19:00:47,411 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0223, Val Loss: 0.0179
|
177 |
+
2025-01-06 19:00:48,595 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0214, Val Loss: 0.0223
|
178 |
+
2025-01-06 19:00:49,564 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0200, Val Loss: 0.0178
|
179 |
+
2025-01-06 19:00:50,591 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0193, Val Loss: 0.0186
|
180 |
+
2025-01-06 19:00:51,548 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0187, Val Loss: 0.0172
|
181 |
+
2025-01-06 19:00:52,543 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0184, Val Loss: 0.0159
|
182 |
+
2025-01-06 19:00:53,596 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0178, Val Loss: 0.0173
|
183 |
+
2025-01-06 19:00:54,580 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0175, Val Loss: 0.0154
|
184 |
+
2025-01-06 19:00:55,473 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0169, Val Loss: 0.0169
|
185 |
+
2025-01-06 19:00:55,473 - pipeline.trainer - INFO - Training completed!
|
186 |
+
2025-01-06 19:00:55,475 - pipeline.trainer - INFO - Initialize the model...
|
187 |
+
2025-01-06 19:00:55,476 - pipeline.trainer - INFO - Configure the training parameters...
|
188 |
+
2025-01-06 19:00:55,477 - pipeline.trainer - INFO - Start training...
|
189 |
+
2025-01-06 19:02:52,646 - pipeline.preprocesser - INFO - Loading raw data...
|
190 |
+
2025-01-06 19:02:52,671 - pipeline.preprocesser - INFO - Detecting anomalies...
|
191 |
+
2025-01-06 19:02:52,671 - pipeline.preprocesser - INFO - Labeling anomalies...
|
192 |
+
2025-01-06 19:02:52,680 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
193 |
+
2025-01-06 19:02:52,681 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
194 |
+
2025-01-06 19:02:52,682 - pipeline.preprocesser - INFO - Scaling the data...
|
195 |
+
2025-01-06 19:02:52,686 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
|
196 |
+
2025-01-06 19:02:52,693 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
197 |
+
2025-01-06 19:02:52,694 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
198 |
+
2025-01-06 19:02:52,695 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
199 |
+
2025-01-06 19:02:52,696 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
200 |
+
2025-01-06 19:02:52,697 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
201 |
+
2025-01-06 19:02:52,697 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
202 |
+
2025-01-06 19:02:52,698 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
203 |
+
2025-01-06 19:02:52,782 - pipeline.trainer - INFO - Initialize the model...
|
204 |
+
2025-01-06 19:02:52,784 - pipeline.trainer - INFO - Configure the training parameters...
|
205 |
+
2025-01-06 19:02:54,413 - pipeline.trainer - INFO - Start training...
|
206 |
+
2025-01-06 19:02:55,398 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2684, Val Loss: 0.0641
|
207 |
+
2025-01-06 19:02:56,284 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0561, Val Loss: 0.0374
|
208 |
+
2025-01-06 19:02:57,174 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0403, Val Loss: 0.0275
|
209 |
+
2025-01-06 19:02:58,048 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0344, Val Loss: 0.0240
|
210 |
+
2025-01-06 19:02:59,059 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0273, Val Loss: 0.0220
|
211 |
+
2025-01-06 19:03:00,078 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0267, Val Loss: 0.0213
|
212 |
+
2025-01-06 19:03:01,054 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0242, Val Loss: 0.0194
|
213 |
+
2025-01-06 19:03:02,053 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0240, Val Loss: 0.0184
|
214 |
+
2025-01-06 19:03:02,954 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0222, Val Loss: 0.0183
|
215 |
+
2025-01-06 19:03:03,976 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0218, Val Loss: 0.0200
|
216 |
+
2025-01-06 19:03:04,897 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0211, Val Loss: 0.0181
|
217 |
+
2025-01-06 19:03:05,807 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0197, Val Loss: 0.0171
|
218 |
+
2025-01-06 19:03:07,057 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0197, Val Loss: 0.0185
|
219 |
+
2025-01-06 19:03:08,468 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0191, Val Loss: 0.0181
|
220 |
+
2025-01-06 19:03:09,581 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0188, Val Loss: 0.0186
|
221 |
+
2025-01-06 19:03:10,485 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0185, Val Loss: 0.0185
|
222 |
+
2025-01-06 19:03:11,373 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0181, Val Loss: 0.0164
|
223 |
+
2025-01-06 19:03:12,261 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0181, Val Loss: 0.0179
|
224 |
+
2025-01-06 19:03:13,120 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0170, Val Loss: 0.0153
|
225 |
+
2025-01-06 19:03:14,041 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0166, Val Loss: 0.0141
|
226 |
+
2025-01-06 19:03:14,041 - pipeline.trainer - INFO - Training completed!
|
227 |
+
2025-01-06 19:03:29,028 - pipeline.preprocesser - INFO - Loading raw data...
|
228 |
+
2025-01-06 19:03:29,058 - pipeline.preprocesser - INFO - Detecting anomalies...
|
229 |
+
2025-01-06 19:03:29,058 - pipeline.preprocesser - INFO - Labeling anomalies...
|
230 |
+
2025-01-06 19:03:29,069 - pipeline.preprocesser - INFO - Splitting the data into training, validation and testing set...
|
231 |
+
2025-01-06 19:03:29,070 - pipeline.preprocesser - INFO - Splitting the data into features and target...
|
232 |
+
2025-01-06 19:03:29,072 - pipeline.preprocesser - INFO - Scaling the data...
|
233 |
+
2025-01-06 19:03:29,077 - pipeline.preprocesser - INFO - Creating sliding window with the length of 24 from the data...
|
234 |
+
2025-01-06 19:03:29,087 - pipeline.preprocesser - INFO - Saving the preprocessed data...
|
235 |
+
2025-01-06 19:03:29,088 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_features.npy has been saved successfully!
|
236 |
+
2025-01-06 19:03:29,089 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_features.npy has been saved successfully!
|
237 |
+
2025-01-06 19:03:29,092 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_features.npy has been saved successfully!
|
238 |
+
2025-01-06 19:03:29,093 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\train_labels.npy has been saved successfully!
|
239 |
+
2025-01-06 19:03:29,093 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\val_labels.npy has been saved successfully!
|
240 |
+
2025-01-06 19:03:29,094 - pipeline.preprocesser - INFO - C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\data\preprocessed_data\test_labels.npy has been saved successfully!
|
241 |
+
2025-01-06 19:03:29,156 - pipeline.trainer - INFO - Initialize the model...
|
242 |
+
2025-01-06 19:03:29,157 - pipeline.trainer - INFO - Configure the training parameters...
|
243 |
+
2025-01-06 19:03:30,542 - pipeline.trainer - INFO - Start training...
|
244 |
+
2025-01-06 19:03:31,393 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 0.2989, Val Loss: 0.0746
|
245 |
+
2025-01-06 19:03:32,272 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.0619, Val Loss: 0.0432
|
246 |
+
2025-01-06 19:03:33,148 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.0434, Val Loss: 0.0294
|
247 |
+
2025-01-06 19:03:34,056 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.0372, Val Loss: 0.0289
|
248 |
+
2025-01-06 19:03:34,932 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.0334, Val Loss: 0.0241
|
249 |
+
2025-01-06 19:03:35,838 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.0306, Val Loss: 0.0254
|
250 |
+
2025-01-06 19:03:36,718 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.0296, Val Loss: 0.0218
|
251 |
+
2025-01-06 19:03:37,602 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.0263, Val Loss: 0.0228
|
252 |
+
2025-01-06 19:03:38,475 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.0253, Val Loss: 0.0266
|
253 |
+
2025-01-06 19:03:39,344 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.0247, Val Loss: 0.0200
|
254 |
+
2025-01-06 19:03:40,207 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.0234, Val Loss: 0.0206
|
255 |
+
2025-01-06 19:03:41,117 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.0224, Val Loss: 0.0198
|
256 |
+
2025-01-06 19:03:41,980 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.0220, Val Loss: 0.0188
|
257 |
+
2025-01-06 19:03:42,848 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.0215, Val Loss: 0.0194
|
258 |
+
2025-01-06 19:03:43,749 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.0210, Val Loss: 0.0217
|
259 |
+
2025-01-06 19:03:44,659 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.0204, Val Loss: 0.0208
|
260 |
+
2025-01-06 19:03:45,532 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.0207, Val Loss: 0.0208
|
261 |
+
2025-01-06 19:03:46,410 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.0204, Val Loss: 0.0252
|
262 |
+
2025-01-06 19:03:47,283 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.0204, Val Loss: 0.0193
|
263 |
+
2025-01-06 19:03:48,166 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.0191, Val Loss: 0.0184
|
264 |
+
2025-01-06 19:03:48,166 - pipeline.trainer - INFO - Training completed!
|
265 |
+
2025-01-06 19:03:48,168 - pipeline.trainer - INFO - Initialize the model...
|
266 |
+
2025-01-06 19:03:48,168 - pipeline.trainer - INFO - Configure the training parameters...
|
267 |
+
2025-01-06 19:03:48,169 - pipeline.trainer - INFO - Start training...
|
268 |
+
2025-01-06 19:03:49,381 - pipeline.trainer - INFO - Epoch 1/20, Train Loss: 1.0015, Val Loss: 1.0956
|
269 |
+
2025-01-06 19:03:50,607 - pipeline.trainer - INFO - Epoch 2/20, Train Loss: 0.8309, Val Loss: 0.8594
|
270 |
+
2025-01-06 19:03:52,089 - pipeline.trainer - INFO - Epoch 3/20, Train Loss: 0.7853, Val Loss: 0.8332
|
271 |
+
2025-01-06 19:03:53,445 - pipeline.trainer - INFO - Epoch 4/20, Train Loss: 0.7632, Val Loss: 0.8482
|
272 |
+
2025-01-06 19:03:54,595 - pipeline.trainer - INFO - Epoch 5/20, Train Loss: 0.7387, Val Loss: 0.7666
|
273 |
+
2025-01-06 19:03:55,804 - pipeline.trainer - INFO - Epoch 6/20, Train Loss: 0.6963, Val Loss: 0.6869
|
274 |
+
2025-01-06 19:03:56,969 - pipeline.trainer - INFO - Epoch 7/20, Train Loss: 0.6593, Val Loss: 0.6919
|
275 |
+
2025-01-06 19:03:58,135 - pipeline.trainer - INFO - Epoch 8/20, Train Loss: 0.6531, Val Loss: 0.6691
|
276 |
+
2025-01-06 19:03:59,274 - pipeline.trainer - INFO - Epoch 9/20, Train Loss: 0.6444, Val Loss: 0.6625
|
277 |
+
2025-01-06 19:04:00,445 - pipeline.trainer - INFO - Epoch 10/20, Train Loss: 0.6473, Val Loss: 0.6644
|
278 |
+
2025-01-06 19:04:01,577 - pipeline.trainer - INFO - Epoch 11/20, Train Loss: 0.6338, Val Loss: 0.6757
|
279 |
+
2025-01-06 19:04:02,737 - pipeline.trainer - INFO - Epoch 12/20, Train Loss: 0.6356, Val Loss: 0.6671
|
280 |
+
2025-01-06 19:04:03,890 - pipeline.trainer - INFO - Epoch 13/20, Train Loss: 0.6390, Val Loss: 0.6591
|
281 |
+
2025-01-06 19:04:05,140 - pipeline.trainer - INFO - Epoch 14/20, Train Loss: 0.6335, Val Loss: 0.6530
|
282 |
+
2025-01-06 19:04:06,665 - pipeline.trainer - INFO - Epoch 15/20, Train Loss: 0.6349, Val Loss: 0.6527
|
283 |
+
2025-01-06 19:04:08,485 - pipeline.trainer - INFO - Epoch 16/20, Train Loss: 0.6383, Val Loss: 0.6734
|
284 |
+
2025-01-06 19:04:09,881 - pipeline.trainer - INFO - Epoch 17/20, Train Loss: 0.6310, Val Loss: 0.6583
|
285 |
+
2025-01-06 19:04:11,073 - pipeline.trainer - INFO - Epoch 18/20, Train Loss: 0.6340, Val Loss: 0.6496
|
286 |
+
2025-01-06 19:04:12,325 - pipeline.trainer - INFO - Epoch 19/20, Train Loss: 0.6363, Val Loss: 0.6392
|
287 |
+
2025-01-06 19:04:13,552 - pipeline.trainer - INFO - Epoch 20/20, Train Loss: 0.6296, Val Loss: 0.6535
|
288 |
+
2025-01-06 19:04:13,552 - pipeline.trainer - INFO - Training completed!
|
289 |
+
2025-01-06 19:04:13,561 - pipeline.trainer - INFO - Initialize the model...
|
290 |
+
2025-01-06 19:04:13,562 - pipeline.trainer - INFO - Configure the training parameters...
|
291 |
+
2025-01-06 19:04:13,562 - pipeline.trainer - INFO - Start training...
|
292 |
+
2025-01-06 19:04:18,489 - pipeline.trainer - INFO - Epoch 1/5, Train Loss: 0.0816, Val Loss: 0.0026
|
293 |
+
2025-01-06 19:04:23,306 - pipeline.trainer - INFO - Epoch 2/5, Train Loss: 0.0082, Val Loss: 0.0021
|
294 |
+
2025-01-06 19:04:28,844 - pipeline.trainer - INFO - Epoch 3/5, Train Loss: 0.0046, Val Loss: 0.0012
|
295 |
+
2025-01-06 19:04:33,885 - pipeline.trainer - INFO - Epoch 4/5, Train Loss: 0.0031, Val Loss: 0.0006
|
296 |
+
2025-01-06 19:04:38,925 - pipeline.trainer - INFO - Epoch 5/5, Train Loss: 0.0024, Val Loss: 0.0007
|
297 |
+
2025-01-06 19:04:38,925 - pipeline.trainer - INFO - Training completed!
|
298 |
+
2025-01-06 19:04:38,939 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\lstm_model_small.pth
|
299 |
+
2025-01-06 19:04:38,943 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\vae_model_small.pth
|
300 |
+
2025-01-06 19:04:38,951 - root - INFO - Model saved at C:\Users\LENOVO\PycharmProjects\NeuroOrion_Time_Series_Anomaly_Detection\models\transformer_model_small.pth
|
src/pipeline/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .dataloader import NYCDataLoader
|
2 |
+
from .preprocesser import Preprocessor
|
3 |
+
from .model import (VanillaLSTM,
|
4 |
+
Transformer,
|
5 |
+
VAE)
|
6 |
+
from .trainer import Trainer
|
7 |
+
from .utils import save_model
|
src/pipeline/dataloader.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Class for loading data from the dataset
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
import pandas as pd
|
5 |
+
import torch
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
from torch.utils.data import TensorDataset, DataLoader
|
9 |
+
|
10 |
+
from path_config import (
|
11 |
+
RAW_DATA_PATH,
|
12 |
+
TRAIN_FEATURES_PATH,
|
13 |
+
TRAIN_LABELS_PATH,
|
14 |
+
VAL_FEATURES_PATH,
|
15 |
+
VAL_LABELS_PATH,
|
16 |
+
TEST_FEATURES_PATH,
|
17 |
+
TEST_LABELS_PATH,
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
class NYCDataLoader:
|
22 |
+
|
23 |
+
def __init__(self, batch_size):
|
24 |
+
self.batch_size = batch_size
|
25 |
+
self.train_features = None
|
26 |
+
self.train_labels = None
|
27 |
+
self.val_features = None
|
28 |
+
self.val_labels = None
|
29 |
+
self.test_features = None
|
30 |
+
self.test_labels = None
|
31 |
+
self.logger = logging.getLogger(__name__)
|
32 |
+
|
33 |
+
def create_tensor(self):
|
34 |
+
"""
|
35 |
+
Load the preprocessed data and convert them to tensors
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
self.train_features = torch.tensor(np.load(TRAIN_FEATURES_PATH)).float()
|
39 |
+
self.train_labels = torch.tensor(np.load(TRAIN_LABELS_PATH)).float()
|
40 |
+
self.val_features = torch.tensor(np.load(VAL_FEATURES_PATH)).float()
|
41 |
+
self.val_labels = torch.tensor(np.load(VAL_LABELS_PATH)).float()
|
42 |
+
self.test_features = torch.tensor(np.load(TEST_FEATURES_PATH)).float()
|
43 |
+
self.test_labels = torch.tensor(np.load(TEST_LABELS_PATH)).float()
|
44 |
+
except FileNotFoundError:
|
45 |
+
print("Preprocessed data not found. Please run the preprocessing script first.")
|
46 |
+
|
47 |
+
def load_data(self):
|
48 |
+
"""
|
49 |
+
Create data loaders for training, validation, and testing
|
50 |
+
"""
|
51 |
+
self.create_tensor()
|
52 |
+
|
53 |
+
train_loader = DataLoader(self.train_features, batch_size=self.batch_size, shuffle=True)
|
54 |
+
val_loader = DataLoader(self.val_features, batch_size=self.batch_size, shuffle=True)
|
55 |
+
test_loader = DataLoader(self.test_features, batch_size=self.batch_size, shuffle=True)
|
56 |
+
|
57 |
+
print("Data loaded successfully.")
|
58 |
+
|
59 |
+
return train_loader, val_loader, test_loader
|
60 |
+
|
61 |
+
def get_true_anomalies(self):
|
62 |
+
"""
|
63 |
+
Get the true anomalies from the test data
|
64 |
+
"""
|
65 |
+
return self.test_labels
|
src/pipeline/model.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Class to define the network architecture of the models
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from torch.utils.data import DataLoader
|
6 |
+
from torch.optim import Adam
|
7 |
+
|
8 |
+
|
9 |
+
class VanillaLSTM(nn.Module):
|
10 |
+
def __init__(
|
11 |
+
self, input_dim=1, hidden_dim=64, output_dim=1, num_layers=2, dropout=0.2
|
12 |
+
):
|
13 |
+
super(VanillaLSTM, self).__init__()
|
14 |
+
self.hidden_dim = hidden_dim
|
15 |
+
self.num_layers = num_layers
|
16 |
+
|
17 |
+
self.lstm = nn.LSTM(
|
18 |
+
input_size=input_dim,
|
19 |
+
hidden_size=hidden_dim,
|
20 |
+
num_layers=num_layers,
|
21 |
+
batch_first=True,
|
22 |
+
dropout=dropout,
|
23 |
+
)
|
24 |
+
self.fc = nn.Linear(in_features=hidden_dim, out_features=output_dim)
|
25 |
+
|
26 |
+
def forward(self, x):
|
27 |
+
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
|
28 |
+
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
|
29 |
+
|
30 |
+
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
|
31 |
+
out = self.fc(out[:, -1, :])
|
32 |
+
|
33 |
+
return out
|
34 |
+
|
35 |
+
class VAE(nn.Module):
|
36 |
+
|
37 |
+
def __init__(self, seq_len=48, n_features=1, hidden_dim=64, latent_dim=16, dropout=0.3):
|
38 |
+
super(VAE, self).__init__()
|
39 |
+
self.seq_len = seq_len
|
40 |
+
self.hidden_dim = hidden_dim
|
41 |
+
|
42 |
+
# Encoder
|
43 |
+
self.enc_lstm = nn.LSTM(
|
44 |
+
input_size=n_features,
|
45 |
+
hidden_size=hidden_dim,
|
46 |
+
batch_first=True
|
47 |
+
)
|
48 |
+
self.enc_dropout = nn.Dropout(p=dropout)
|
49 |
+
self.fc_mu = nn.Linear(hidden_dim, latent_dim)
|
50 |
+
self.fc_var = nn.Linear(hidden_dim, latent_dim)
|
51 |
+
|
52 |
+
# Decoder
|
53 |
+
self.fc_upsample = nn.Linear(latent_dim, seq_len * hidden_dim)
|
54 |
+
self.dec_dropout = nn.Dropout(p=dropout)
|
55 |
+
self.dec_lstm = nn.LSTM(
|
56 |
+
input_size=hidden_dim,
|
57 |
+
hidden_size=hidden_dim,
|
58 |
+
batch_first=True
|
59 |
+
)
|
60 |
+
self.fc_out = nn.Linear(hidden_dim, n_features)
|
61 |
+
|
62 |
+
def reparameterize(self, mu, log_var):
|
63 |
+
std = torch.exp(0.5 * log_var)
|
64 |
+
eps = torch.randn_like(std)
|
65 |
+
return mu + eps * std
|
66 |
+
|
67 |
+
def forward(self, x):
|
68 |
+
# Encode
|
69 |
+
_, (h_enc, c_enc) = self.enc_lstm(x)
|
70 |
+
h_enc = h_enc.squeeze(0) # shape: (batch_size, hidden_dim)
|
71 |
+
h_enc = self.enc_dropout(h_enc)
|
72 |
+
mu, log_var = self.fc_mu(h_enc), self.fc_var(h_enc)
|
73 |
+
|
74 |
+
# Reparameterize at latent space
|
75 |
+
z = self.reparameterize(mu, log_var)
|
76 |
+
|
77 |
+
# Decode
|
78 |
+
z = self.fc_upsample(z)
|
79 |
+
z = z.view(-1, self.seq_len, self.hidden_dim)
|
80 |
+
decoded, _ = self.dec_lstm(z)
|
81 |
+
dec_out = self.dec_dropout(decoded)
|
82 |
+
out = self.fc_out(dec_out)
|
83 |
+
|
84 |
+
return out, mu, log_var
|
85 |
+
|
86 |
+
class Transformer(nn.Module):
|
87 |
+
|
88 |
+
def __init__(self, input_dim=1, model_dim=64, num_layers=2, num_heads=4, dropout=0.2):
|
89 |
+
super(Transformer, self).__init__()
|
90 |
+
self.model_dim = model_dim
|
91 |
+
self.num_layers = num_layers
|
92 |
+
|
93 |
+
self.embedding = nn.Linear(input_dim, model_dim)
|
94 |
+
|
95 |
+
encoder_layer = nn.TransformerEncoderLayer(
|
96 |
+
d_model=model_dim,
|
97 |
+
nhead=num_heads,
|
98 |
+
dropout=dropout,
|
99 |
+
dim_feedforward=2*model_dim, # 128
|
100 |
+
batch_first=True
|
101 |
+
)
|
102 |
+
encoder_norm = nn.LayerNorm(model_dim)
|
103 |
+
|
104 |
+
self.transformer_encoder = nn.TransformerEncoder(
|
105 |
+
encoder_layer,
|
106 |
+
num_layers=num_layers,
|
107 |
+
norm=encoder_norm
|
108 |
+
)
|
109 |
+
|
110 |
+
decoder_layer = nn.TransformerDecoderLayer(
|
111 |
+
d_model=model_dim,
|
112 |
+
nhead=num_heads,
|
113 |
+
dropout=dropout,
|
114 |
+
dim_feedforward=2*model_dim, # 128
|
115 |
+
batch_first=True
|
116 |
+
)
|
117 |
+
decoder_norm = nn.LayerNorm(model_dim)
|
118 |
+
|
119 |
+
self.transformer_decoder = nn.TransformerDecoder(
|
120 |
+
decoder_layer,
|
121 |
+
num_layers=num_layers,
|
122 |
+
norm=decoder_norm
|
123 |
+
)
|
124 |
+
self.output = nn.Linear(model_dim, input_dim)
|
125 |
+
|
126 |
+
def forward(self, x):
|
127 |
+
embed_x = self.embedding(x)
|
128 |
+
enc_out = self.transformer_encoder(embed_x)
|
129 |
+
dec_out = self.transformer_decoder(embed_x, enc_out)
|
130 |
+
out = self.output(dec_out)
|
131 |
+
return out
|
132 |
+
|
src/pipeline/preprocesser.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Class for preprocessing the data before the training phase
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import logging
|
6 |
+
from sklearn.preprocessing import StandardScaler
|
7 |
+
from adtk.data import validate_series, to_events
|
8 |
+
from adtk.detector import SeasonalAD
|
9 |
+
from path_config import DATA_DIR
|
10 |
+
|
11 |
+
|
12 |
+
class Preprocessor:
|
13 |
+
|
14 |
+
def __init__(self):
|
15 |
+
self.scaler = StandardScaler()
|
16 |
+
self.raw_data = None
|
17 |
+
self.anomalies_events = None
|
18 |
+
self.logger = logging.getLogger(__name__)
|
19 |
+
self.window_size = None
|
20 |
+
|
21 |
+
def preprocess_data(self, file_path, val_split="2014-10-01", test_split="2014-10-16", window_size=48):
|
22 |
+
"""
|
23 |
+
Preprocess the raw data
|
24 |
+
|
25 |
+
:param window_size: The size of the sliding window, default is 48
|
26 |
+
:param file_path: Path to the CSV file
|
27 |
+
"""
|
28 |
+
# Load the raw data
|
29 |
+
self.logger.info("Loading raw data...")
|
30 |
+
self.load_raw_data(file_path)
|
31 |
+
|
32 |
+
# Detect anomalies and convert them to events
|
33 |
+
self.logger.info("Detecting anomalies...")
|
34 |
+
self.load_anomalies_events()
|
35 |
+
|
36 |
+
# Label the anomalies in the raw data
|
37 |
+
self.logger.info("Labeling anomalies...")
|
38 |
+
self._label_anomalies()
|
39 |
+
|
40 |
+
# Split the data chronologically
|
41 |
+
self.logger.info("Splitting the data into training, validation and testing set...")
|
42 |
+
train_data, val_data, test_data = self._chronological_split(val_split=val_split, test_split=test_split)
|
43 |
+
|
44 |
+
# Split the data into features and target
|
45 |
+
self.logger.info("Splitting the data into features and target...")
|
46 |
+
X_train, y_train = self._split_features_target(train_data)
|
47 |
+
X_val, y_val = self._split_features_target(val_data)
|
48 |
+
X_test, y_test = self._split_features_target(test_data)
|
49 |
+
|
50 |
+
# Scale the data
|
51 |
+
self.logger.info("Scaling the data...")
|
52 |
+
train_scaled, val_scaled, test_scaled = self._scale_data(X_train, X_val, X_test)
|
53 |
+
|
54 |
+
# Create a sliding window of data
|
55 |
+
self.logger.info(f"Creating sliding window with the length of {window_size} from the data...")
|
56 |
+
train_sequences = self._create_sliding_window(train_scaled, window_size=window_size)
|
57 |
+
val_sequences = self._create_sliding_window(val_scaled, window_size=window_size)
|
58 |
+
test_sequences = self._create_sliding_window(test_scaled, window_size=window_size)
|
59 |
+
|
60 |
+
self.window_size = window_size
|
61 |
+
|
62 |
+
# Save the preprocessed data
|
63 |
+
self.logger.info("Saving the preprocessed data...")
|
64 |
+
self.save_preprocessed_data(train_sequences, "train_features.npy")
|
65 |
+
self.save_preprocessed_data(val_sequences, "val_features.npy")
|
66 |
+
self.save_preprocessed_data(test_sequences, "test_features.npy")
|
67 |
+
self.save_preprocessed_data(y_train.values, "train_labels.npy")
|
68 |
+
self.save_preprocessed_data(y_val.values, "val_labels.npy")
|
69 |
+
self.save_preprocessed_data(y_test.values, "test_labels.npy")
|
70 |
+
|
71 |
+
print("Preprocessing completed!")
|
72 |
+
|
73 |
+
def load_raw_data(self, file_path):
|
74 |
+
"""
|
75 |
+
Load raw data from a CSV file
|
76 |
+
|
77 |
+
:param file_path: Path to the CSV file
|
78 |
+
"""
|
79 |
+
try:
|
80 |
+
df = pd.read_csv(
|
81 |
+
file_path,
|
82 |
+
usecols=["timestamp", "value"],
|
83 |
+
index_col="timestamp",
|
84 |
+
parse_dates=True,
|
85 |
+
)
|
86 |
+
df.sort_index(inplace=True)
|
87 |
+
|
88 |
+
# Rename the columns
|
89 |
+
df.rename(columns={"value": "Traffic"}, inplace=True)
|
90 |
+
df.index.rename("Timestamp", inplace=True)
|
91 |
+
|
92 |
+
# Validate the time series
|
93 |
+
self.raw_data = validate_series(df)
|
94 |
+
|
95 |
+
except FileNotFoundError as e:
|
96 |
+
print(f"File path does not exist: {file_path}")
|
97 |
+
|
98 |
+
def load_anomalies_events(self):
|
99 |
+
"""
|
100 |
+
Load the anomalies events
|
101 |
+
"""
|
102 |
+
if self.raw_data is None:
|
103 |
+
print("Raw data is not loaded")
|
104 |
+
return
|
105 |
+
|
106 |
+
events = [
|
107 |
+
('2014-07-04 00:00:00', '2014-07-06 23:59:59'), # Independence Day Celebration
|
108 |
+
('2014-09-01 00:00:00', '2014-09-01 23:59:59'), # Labour Day
|
109 |
+
('2014-11-02 00:00:00', '2014-11-02 11:59:59'), # NYC Marathon 2014
|
110 |
+
('2014-11-27 00:00:00', '2014-11-27 23:59:59'), # Thanksgiving Day
|
111 |
+
('2014-12-25 00:00:00', '2014-12-26 23:59:59'), # Christmas Holiday
|
112 |
+
('2015-01-01 00:00:00', '2015-01-01 23:59:59'), # New Year
|
113 |
+
('2015-01-26 12:00:00', '2015-01-28 11:59:59') # Snowstorm
|
114 |
+
]
|
115 |
+
|
116 |
+
# Store the events
|
117 |
+
self.anomalies_events = events
|
118 |
+
|
119 |
+
def _label_anomalies(self):
|
120 |
+
"""
|
121 |
+
Label the anomalies in the raw data
|
122 |
+
"""
|
123 |
+
if self.raw_data is None:
|
124 |
+
print("Raw data is not loaded")
|
125 |
+
return
|
126 |
+
|
127 |
+
if self.anomalies_events is None:
|
128 |
+
print("Anomalies are not detected")
|
129 |
+
return
|
130 |
+
|
131 |
+
# Label the anomalies as 1 and 0
|
132 |
+
self.raw_data["Anomaly"] = 0
|
133 |
+
for start, end in self.anomalies_events:
|
134 |
+
self.raw_data.loc[start:end, "Anomaly"] = 1
|
135 |
+
|
136 |
+
def _chronological_split(self, val_split="2014-10-01", test_split="2014-10-16"):
|
137 |
+
"""
|
138 |
+
Split the data chronologically into train, validation, and test sets
|
139 |
+
|
140 |
+
:param val_split: Validation split date
|
141 |
+
:param test_split: Test split date
|
142 |
+
"""
|
143 |
+
if self.raw_data is None:
|
144 |
+
print("Raw data is not loaded")
|
145 |
+
return
|
146 |
+
|
147 |
+
# Split the data
|
148 |
+
train_data = self.raw_data.loc[self.raw_data.index < val_split]
|
149 |
+
val_data = self.raw_data.loc[
|
150 |
+
(self.raw_data.index >= val_split) & (self.raw_data.index < test_split)
|
151 |
+
]
|
152 |
+
test_data = self.raw_data.loc[self.raw_data.index >= test_split]
|
153 |
+
|
154 |
+
return train_data, val_data, test_data
|
155 |
+
|
156 |
+
def _split_features_target(self, data, target_col="Anomaly"):
|
157 |
+
"""
|
158 |
+
Split the data into features and target
|
159 |
+
|
160 |
+
:param data: DataFrame containing the data
|
161 |
+
:param target_col: Column to predict
|
162 |
+
"""
|
163 |
+
# Split the data into features and target
|
164 |
+
X = data.drop(columns=[target_col])
|
165 |
+
y = data[target_col]
|
166 |
+
|
167 |
+
return X, y
|
168 |
+
|
169 |
+
def _scale_data(self, train_data, val_data, test_data):
|
170 |
+
"""
|
171 |
+
Scale the data using StandardScaler
|
172 |
+
|
173 |
+
:param train_data: Training data
|
174 |
+
:param val_data: Validation data
|
175 |
+
:param test_data: Test data
|
176 |
+
"""
|
177 |
+
if self.scaler is None:
|
178 |
+
self.scaler = StandardScaler()
|
179 |
+
|
180 |
+
# Fit and transform the training data
|
181 |
+
train_scaled = self.scaler.fit_transform(train_data)
|
182 |
+
val_scaled = self.scaler.transform(val_data)
|
183 |
+
test_scaled = self.scaler.transform(test_data)
|
184 |
+
|
185 |
+
return train_scaled, val_scaled, test_scaled
|
186 |
+
|
187 |
+
def _create_sliding_window(self, data, window_size=48, step_size=1):
|
188 |
+
"""
|
189 |
+
Create a sliding window of data
|
190 |
+
|
191 |
+
:param data: Scaled data
|
192 |
+
:param window_size: Size of the window
|
193 |
+
:param step_size: Step size for the window
|
194 |
+
"""
|
195 |
+
sequences = []
|
196 |
+
|
197 |
+
for i in range(0, len(data) - window_size + 1, step_size):
|
198 |
+
sequences.append(data[i : i + window_size])
|
199 |
+
|
200 |
+
return np.array(sequences)
|
201 |
+
|
202 |
+
def save_preprocessed_data(self, data, file_path):
|
203 |
+
"""
|
204 |
+
Save preprocessed data to a .npy file
|
205 |
+
|
206 |
+
:param data: Preprocessed data
|
207 |
+
:param file_path: Path to save the .npy file
|
208 |
+
"""
|
209 |
+
dir_path = os.path.join(DATA_DIR, "preprocessed_data")
|
210 |
+
|
211 |
+
if not os.path.exists(dir_path):
|
212 |
+
os.makedirs(dir_path)
|
213 |
+
|
214 |
+
file_path = os.path.join(dir_path, file_path)
|
215 |
+
|
216 |
+
np.save(file_path, data)
|
217 |
+
self.logger.info(f"{file_path} has been saved successfully!")
|
218 |
+
|
219 |
+
def get_seq_length(self):
|
220 |
+
"""
|
221 |
+
Get the length of the sequence
|
222 |
+
"""
|
223 |
+
return self.window_size
|
src/pipeline/trainer.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import copy
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
from torch.optim import Adam
|
7 |
+
from src.pipeline import VanillaLSTM, VAE, Transformer
|
8 |
+
|
9 |
+
|
10 |
+
# Class for model training and evaluation
|
11 |
+
class Trainer:
|
12 |
+
|
13 |
+
def __init__(self):
|
14 |
+
self.logger = logging.getLogger(__name__)
|
15 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
16 |
+
self.batch_size = None,
|
17 |
+
self.model = None
|
18 |
+
self.model_type = None
|
19 |
+
self.optimizer = None
|
20 |
+
self.criterion = None
|
21 |
+
self.train_loader = None
|
22 |
+
self.val_loader = None
|
23 |
+
self.test_loader = None
|
24 |
+
self.n_epochs = None
|
25 |
+
self.train_history = { 'train_loss': [], 'val_loss': [] }
|
26 |
+
self.best_model = None
|
27 |
+
self.best_val_loss = float('inf')
|
28 |
+
|
29 |
+
def init_model(self, model, model_type):
|
30 |
+
"""
|
31 |
+
Initialize the model, optimizer and loss function
|
32 |
+
|
33 |
+
:param model: The model architecture
|
34 |
+
:param model_type: The type of the model
|
35 |
+
"""
|
36 |
+
self.logger.info("Initialize the model...")
|
37 |
+
|
38 |
+
self.model = model.to(self.device)
|
39 |
+
if model_type not in ["lstm", "vae", "transformer"]:
|
40 |
+
raise ValueError("Model type not supported")
|
41 |
+
self.model_type = model_type
|
42 |
+
|
43 |
+
|
44 |
+
def config_train(self, batch_size=32, n_epochs=20, lr=0.001):
|
45 |
+
"""
|
46 |
+
Configure the training parameters
|
47 |
+
|
48 |
+
:param batch_size: The batch size, default is 32
|
49 |
+
:param n_epochs: The number of epochs, default is 20
|
50 |
+
:param lr: The learning rate, default is 0.001
|
51 |
+
"""
|
52 |
+
self.logger.info("Configure the training parameters...")
|
53 |
+
|
54 |
+
self.batch_size = batch_size
|
55 |
+
self.n_epochs = n_epochs
|
56 |
+
|
57 |
+
self.optimizer = Adam(self.model.parameters(), lr=lr)
|
58 |
+
self.criterion = nn.MSELoss()
|
59 |
+
|
60 |
+
def train(self, train_loader, val_loader):
|
61 |
+
"""
|
62 |
+
Train the model
|
63 |
+
|
64 |
+
:param train_loader: The training data loader
|
65 |
+
:param val_loader: The validation data loader
|
66 |
+
"""
|
67 |
+
print("Training the model...")
|
68 |
+
self.logger.info("Start training...")
|
69 |
+
|
70 |
+
self.train_loader = train_loader
|
71 |
+
self.val_loader = val_loader
|
72 |
+
|
73 |
+
self.best_val_loss = float('inf')
|
74 |
+
self.best_model = None
|
75 |
+
|
76 |
+
for epoch in range(self.n_epochs):
|
77 |
+
train_loss = self._train_epoch()
|
78 |
+
val_loss = self._val_epoch()
|
79 |
+
|
80 |
+
self.train_history['train_loss'].append(train_loss)
|
81 |
+
self.train_history['val_loss'].append(val_loss)
|
82 |
+
|
83 |
+
self.logger.info(f"Epoch {epoch + 1}/{self.n_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
|
84 |
+
|
85 |
+
self.logger.info("Training completed!")
|
86 |
+
|
87 |
+
print("Training completed!")
|
88 |
+
|
89 |
+
return self.best_model, self.train_history
|
90 |
+
|
91 |
+
def _train_epoch(self):
|
92 |
+
"""
|
93 |
+
Train the model for one epoch
|
94 |
+
"""
|
95 |
+
self.model.train()
|
96 |
+
train_loss = 0
|
97 |
+
|
98 |
+
for seq in self.train_loader:
|
99 |
+
|
100 |
+
self.optimizer.zero_grad()
|
101 |
+
|
102 |
+
if self.model_type == "lstm":
|
103 |
+
X_train = seq[:, :-1, :] # All timestamp except the last one
|
104 |
+
y_train = seq[:, -1, :] # Final timestamp
|
105 |
+
|
106 |
+
X_train = X_train.to(self.device)
|
107 |
+
y_train = y_train.to(self.device)
|
108 |
+
|
109 |
+
output = self.model(X_train)
|
110 |
+
loss = self.criterion(output, y_train)
|
111 |
+
|
112 |
+
elif self.model_type == "vae":
|
113 |
+
X = seq.to(self.device)
|
114 |
+
recon_X, mu, logvar = self.model(X)
|
115 |
+
recon_loss = self.criterion(recon_X, X)
|
116 |
+
kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / X.size(0)
|
117 |
+
loss = recon_loss + 0.2 * kl_div
|
118 |
+
|
119 |
+
elif self.model_type == "transformer":
|
120 |
+
X = seq.to(self.device)
|
121 |
+
|
122 |
+
recon_X = self.model(X)
|
123 |
+
loss = self.criterion(recon_X, X)
|
124 |
+
else:
|
125 |
+
raise ValueError("Model type not supported")
|
126 |
+
|
127 |
+
loss.backward()
|
128 |
+
self.optimizer.step()
|
129 |
+
|
130 |
+
train_loss += loss.item()
|
131 |
+
|
132 |
+
return train_loss / len(self.train_loader)
|
133 |
+
|
134 |
+
def _val_epoch(self):
|
135 |
+
"""
|
136 |
+
Validate the model for one epoch
|
137 |
+
"""
|
138 |
+
self.model.eval()
|
139 |
+
val_loss = 0
|
140 |
+
|
141 |
+
with torch.no_grad():
|
142 |
+
for seq in self.val_loader:
|
143 |
+
|
144 |
+
if self.model_type == "lstm":
|
145 |
+
X_val = seq[:, :-1, :]
|
146 |
+
y_val = seq[:, -1, :]
|
147 |
+
|
148 |
+
X_val = X_val.to(self.device)
|
149 |
+
y_val = y_val.to(self.device)
|
150 |
+
|
151 |
+
output = self.model(X_val)
|
152 |
+
loss = self.criterion(output, y_val)
|
153 |
+
|
154 |
+
elif self.model_type == "vae":
|
155 |
+
X = seq.to(self.device)
|
156 |
+
recon_X, mu, logvar = self.model(X)
|
157 |
+
recon_loss = self.criterion(recon_X, X)
|
158 |
+
kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / X.size(0)
|
159 |
+
loss = recon_loss + 0.2 * kl_div
|
160 |
+
|
161 |
+
elif self.model_type == "transformer":
|
162 |
+
X_val = seq.to(self.device)
|
163 |
+
|
164 |
+
recon_X = self.model(X_val)
|
165 |
+
loss = self.criterion(recon_X, X_val)
|
166 |
+
|
167 |
+
else:
|
168 |
+
raise ValueError("Model type not supported")
|
169 |
+
|
170 |
+
val_loss += loss.item()
|
171 |
+
|
172 |
+
if val_loss < self.best_val_loss:
|
173 |
+
self.best_model = copy.deepcopy(self.model)
|
174 |
+
self.best_val_loss = val_loss
|
175 |
+
|
176 |
+
return val_loss / len(self.val_loader)
|
src/pipeline/utils.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
from path_config import MODEL_DIR
|
6 |
+
|
7 |
+
def save_model(model, model_name):
|
8 |
+
"""
|
9 |
+
Save the trained model
|
10 |
+
"""
|
11 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
12 |
+
|
13 |
+
model_path = os.path.join(MODEL_DIR, model_name)
|
14 |
+
torch.save(model.state_dict(), model_path)
|
15 |
+
logging.info(f"Model saved at {model_path}")
|
16 |
+
print("Saved successfully!")
|
src/train.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.config.config import setup_logging
|
2 |
+
from pipeline import Preprocessor, NYCDataLoader, Trainer, VanillaLSTM, Transformer, VAE, save_model
|
3 |
+
from path_config import RAW_DATA_PATH
|
4 |
+
|
5 |
+
def train():
|
6 |
+
|
7 |
+
seq_length = 24
|
8 |
+
|
9 |
+
setup_logging()
|
10 |
+
|
11 |
+
# Preprocess the data
|
12 |
+
preprocessor = Preprocessor()
|
13 |
+
preprocessor.preprocess_data(file_path=RAW_DATA_PATH, window_size=seq_length)
|
14 |
+
|
15 |
+
# Load the preprocessed data
|
16 |
+
data_loader = NYCDataLoader(batch_size=32)
|
17 |
+
train_loader, val_loader, test_loader = data_loader.load_data()
|
18 |
+
|
19 |
+
# Initialize the Trainer
|
20 |
+
trainer = Trainer()
|
21 |
+
|
22 |
+
# Train Vanilla LSTM model
|
23 |
+
trainer.init_model(model=VanillaLSTM(), model_type="lstm")
|
24 |
+
trainer.config_train(batch_size=32, n_epochs=20, lr=0.001)
|
25 |
+
lstm_model, lstm_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
|
26 |
+
|
27 |
+
# Train VAE model
|
28 |
+
trainer.init_model(model=VAE(seq_len=seq_length), model_type="vae")
|
29 |
+
trainer.config_train(batch_size=32, n_epochs=20, lr=0.001)
|
30 |
+
vae_model, vae_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
|
31 |
+
|
32 |
+
# Train Transformer model
|
33 |
+
trainer.init_model(model=Transformer(), model_type="transformer")
|
34 |
+
trainer.config_train(batch_size=32, n_epochs=5, lr=0.001)
|
35 |
+
transformer_model, transformer_history = trainer.train(train_loader=train_loader, val_loader=val_loader)
|
36 |
+
|
37 |
+
# Save the models
|
38 |
+
save_model(lstm_model, "lstm_model_small.pth")
|
39 |
+
save_model(vae_model, "vae_model_small.pth")
|
40 |
+
save_model(transformer_model, "transformer_model_small.pth")
|
41 |
+
|
42 |
+
|
43 |
+
if __name__ == '__main__':
|
44 |
+
train()
|