File size: 2,767 Bytes
3b66598
fff2149
3b66598
fff2149
 
 
3b66598
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fff2149
3b66598
 
 
 
 
 
 
 
fff2149
3b66598
 
 
 
 
 
fff2149
3b66598
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
from tensorflow.keras.models import load_model
import joblib

class RTUAnomalizer:
    model = None
    kmeans_models = []

    def __init__(self, prediction_model_path = None, clustering_model_paths= None, num_inputs = None, num_outputs = None):
        
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        if not prediction_model_path is None and not clustering_model_paths is None:
            self.load_models(prediction_model_path, clustering_model_paths)

    def initialize_lists(size=30):
        initial_values = [0] * size
        return initial_values.copy(), initial_values.copy(), initial_values.copy()

    def load_models(self, prediction_model_path, clustering_model_paths):
        self.model = load_model(prediction_model_path)
        
        for path in clustering_model_paths:
            self.kmeans_models.append(joblib.load(path))
            
    def predict(self, df_new):
        return self.model.predict(df_new)

    def calculate_residuals(self,df_trans, pred):
        actual = df_trans[30,:self.num_outputs+1]
        resid = actual - pred
        return actual, resid

    def resize_prediction(self,pred, df_trans):
        pred.resize((pred.shape[0], pred.shape[1] + len(df_trans[30,self.num_outputs+1:])))
        pred[:, -len(df_trans[30,self.num_outputs+1:]):] = df_trans[30,self.num_outputs+1:]
        return pred

    def inverse_transform(scaler, pred, df_trans):
        pred = scaler.inverse_transform(np.array(pred))
        actual = scaler.inverse_transform(np.array([df_trans[30,:]]))
        return actual, pred

    def update_lists(actual_list, pred_list, resid_list, actual, pred, resid):
        actual_list.pop(0)
        pred_list.pop(0)
        resid_list.pop(0)
        actual_list.append(actual[0,1])
        pred_list.append(pred[0,1])
        resid_list.append(resid[0,1])
        return actual_list, pred_list, resid_list

    def calculate_distances(self,resid):
        dist = []
        for i, model in enumerate(self.kmeans_models):
            dist.append(np.linalg.norm(resid[:,(i*7)+1:(i*7)+8]-model.cluster_centers_[0], ord=2, axis=1))
            
        return np.array(dist)

    def pipeline(self, df_new, df_trans, scaler):
        actual_list, pred_list, resid_list = self.initialize_lists()
        pred = self.predict(df_new)
        actual, resid = self.calculate_residuals(df_trans, pred)
        pred = self.resize_prediction(pred, df_trans)
        actual, pred = self.inverse_transform(scaler, pred, df_trans)
        actual_list, pred_list, resid_list = self.update_lists(actual_list, pred_list, resid_list, actual, pred, resid)
        dist = self.calculate_distances(resid)
        return actual_list, pred_list, resid_list, dist