File size: 5,929 Bytes
5b475df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import streamlit as st
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the non-anomaly data
non_anomaly_csv_filename = 'non_anomaly_data.csv'
non_anomaly_df = pd.read_csv(non_anomaly_csv_filename)

# Open the Mitos Spreadsheet file
#st.write("Opening Mitos Spreadsheet file...")
#st.csv_open("non_anomaly_data.csv")

# Display the first sheet
#st.write(st.get_active_sheet().name)

# Display the first row of the first sheet
#st.write(st.get_active_sheet().rows[0])

# Load the Isolation Forest model
model_filename = "IsolationForest.joblib"
isolation_forest = joblib.load(model_filename)

# Load the StandardScaler
scaler_filename = "StandardScaler.joblib"
scaler = joblib.load(scaler_filename)

st.title("Anomaly Detection App with Isolation Forest")

st.sidebar.title("Input Feature Values")
transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0)
longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0)
latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0)
credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000)
year = st.sidebar.slider("Year", min_value=2000, max_value=2030)
month = st.sidebar.slider("Month", min_value=1, max_value=12)
day = st.sidebar.slider("Day", min_value=1, max_value=31)

submitted = st.sidebar.button("Submit")

if submitted:
    input_data = {
        'transaction_dollar_amount': transaction_dollar_amount,
        'Long': longitude,
        'Lat': latitude,
        'credit_card_limit': credit_card_limit,
        'year': year,
        'month': month,
        'day': day
    }

    selected_columns = pd.DataFrame([input_data])

    # Standardize the input data using the loaded StandardScaler
    selected_columns_scaled = scaler.transform(selected_columns)

    # Apply Isolation Forest for anomaly detection on the non-anomaly dataset
    non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df))

# Apply Isolation Forest for anomaly detection on your single input data
    your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0]



    # Calculate the minimum and maximum anomaly scores from non-anomaly data
    min_non_anomaly_score = np.min(non_anomaly_scores)
    max_non_anomaly_score = np.max(non_anomaly_scores)

# Add a margin of error for the range
    margin = 0.5
    min_threshold = min_non_anomaly_score - margin
    max_threshold = max_non_anomaly_score + margin

    # Determine if the input data point is an anomaly based on the score
    #is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95)

    # Determine if the input data point is an anomaly based on the score
    is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold


# Print the anomaly status
    st.subheader("Anomaly Classification")
    if is_anomaly:
        st.write("Prediction Result: 🚨 Anomaly Detected!")
    else:
        st.write("Prediction Result: βœ… Not Anomaly")

# Create a bar plot to visualize the anomaly score distribution and your data point's score
    plt.figure(figsize=(8, 5))

# Plot the distribution of anomaly scores from the non-anomaly dataset
    sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution')

# Plot your data point's anomaly score
    plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point')

# Set labels and title
    plt.xlabel('Anomaly Score')
    plt.ylabel('Frequency')
    plt.title('Anomaly Score Distribution and Your Data Point')
    plt.legend()
#plt.grid(True)

# Display the histogram plot
    st.pyplot(plt)


# Explain the results
    st.write("The input data point has been classified as an anomaly." if is_anomaly
            else "The input data point is not classified as an anomaly.")
    st.write("The anomaly score is:", your_anomaly_score)
    st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold)

    # Create a scatter plot for longitude and latitude
    fig, ax = plt.subplots(figsize=(10, 8))

# Plot non-anomaly data
    sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal πŸ™οΈ', ax=ax)

# Plot input data
    if is_anomaly:
        ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious 🚩', s=100, marker='x')
        anomaly_marker = 'Suspicious 🚩'
    else:
        ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid βœ…', s=100, marker='o')
        anomaly_marker = 'Valid βœ…'

    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    ax.set_title("Location Plot: Anomaly Detection πŸ—ΊοΈ")
    ax.legend()
    ax.grid(True)

# Show the scatter plot in Streamlit
    st.subheader("Location Plot: Anomaly Detection πŸ—ΊοΈ")
    st.pyplot(fig)

# Explanation based on the anomaly classification
    st.subheader("Anomaly Classification")
    if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
        st.write("Prediction Result: 🚨 Anomaly Detected!")
    else:
        st.write("Prediction Result: βœ… Not Anomaly")

# Explain the results
    # Explain the results
    st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.")
    if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
        st.write("The input data point is marked as Suspicious 🚩 due to its anomaly score.")
        st.write("The red 'x' marker indicates a suspicious location.")
    else:
        st.write("The input data point is marked as Valid βœ… due to its anomaly score.")
        st.write("The green 'o' marker indicates a valid location.")