Spaces:
Runtime error
Runtime error
import streamlit as st | |
import numpy as np | |
import joblib | |
from sklearn.preprocessing import StandardScaler | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Load the non-anomaly data | |
non_anomaly_csv_filename = 'non_anomaly_data.csv' | |
non_anomaly_df = pd.read_csv(non_anomaly_csv_filename) | |
# Open the Mitos Spreadsheet file | |
#st.write("Opening Mitos Spreadsheet file...") | |
#st.csv_open("non_anomaly_data.csv") | |
# Display the first sheet | |
#st.write(st.get_active_sheet().name) | |
# Display the first row of the first sheet | |
#st.write(st.get_active_sheet().rows[0]) | |
# Load the Isolation Forest model | |
model_filename = "IsolationForest.joblib" | |
isolation_forest = joblib.load(model_filename) | |
# Load the StandardScaler | |
scaler_filename = "StandardScaler.joblib" | |
scaler = joblib.load(scaler_filename) | |
st.title("Anomaly Detection App with Isolation Forest") | |
st.sidebar.title("Input Feature Values") | |
transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0) | |
longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0) | |
latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0) | |
credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000) | |
year = st.sidebar.slider("Year", min_value=2000, max_value=2030) | |
month = st.sidebar.slider("Month", min_value=1, max_value=12) | |
day = st.sidebar.slider("Day", min_value=1, max_value=31) | |
submitted = st.sidebar.button("Submit") | |
if submitted: | |
input_data = { | |
'transaction_dollar_amount': transaction_dollar_amount, | |
'Long': longitude, | |
'Lat': latitude, | |
'credit_card_limit': credit_card_limit, | |
'year': year, | |
'month': month, | |
'day': day | |
} | |
selected_columns = pd.DataFrame([input_data]) | |
# Standardize the input data using the loaded StandardScaler | |
selected_columns_scaled = scaler.transform(selected_columns) | |
# Apply Isolation Forest for anomaly detection on the non-anomaly dataset | |
non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df)) | |
# Apply Isolation Forest for anomaly detection on your single input data | |
your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0] | |
# Calculate the minimum and maximum anomaly scores from non-anomaly data | |
min_non_anomaly_score = np.min(non_anomaly_scores) | |
max_non_anomaly_score = np.max(non_anomaly_scores) | |
# Add a margin of error for the range | |
margin = 0.5 | |
min_threshold = min_non_anomaly_score - margin | |
max_threshold = max_non_anomaly_score + margin | |
# Determine if the input data point is an anomaly based on the score | |
#is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95) | |
# Determine if the input data point is an anomaly based on the score | |
is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold | |
# Print the anomaly status | |
st.subheader("Anomaly Classification") | |
if is_anomaly: | |
st.write("Prediction Result: π¨ Anomaly Detected!") | |
else: | |
st.write("Prediction Result: β Not Anomaly") | |
# Create a bar plot to visualize the anomaly score distribution and your data point's score | |
plt.figure(figsize=(8, 5)) | |
# Plot the distribution of anomaly scores from the non-anomaly dataset | |
sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution') | |
# Plot your data point's anomaly score | |
plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point') | |
# Set labels and title | |
plt.xlabel('Anomaly Score') | |
plt.ylabel('Frequency') | |
plt.title('Anomaly Score Distribution and Your Data Point') | |
plt.legend() | |
#plt.grid(True) | |
# Display the histogram plot | |
st.pyplot(plt) | |
# Explain the results | |
st.write("The input data point has been classified as an anomaly." if is_anomaly | |
else "The input data point is not classified as an anomaly.") | |
st.write("The anomaly score is:", your_anomaly_score) | |
st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold) | |
# Create a scatter plot for longitude and latitude | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
# Plot non-anomaly data | |
sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal ποΈ', ax=ax) | |
# Plot input data | |
if is_anomaly: | |
ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious π©', s=100, marker='x') | |
anomaly_marker = 'Suspicious π©' | |
else: | |
ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid β ', s=100, marker='o') | |
anomaly_marker = 'Valid β ' | |
ax.set_xlabel("Longitude") | |
ax.set_ylabel("Latitude") | |
ax.set_title("Location Plot: Anomaly Detection πΊοΈ") | |
ax.legend() | |
ax.grid(True) | |
# Show the scatter plot in Streamlit | |
st.subheader("Location Plot: Anomaly Detection πΊοΈ") | |
st.pyplot(fig) | |
# Explanation based on the anomaly classification | |
st.subheader("Anomaly Classification") | |
if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold: | |
st.write("Prediction Result: π¨ Anomaly Detected!") | |
else: | |
st.write("Prediction Result: β Not Anomaly") | |
# Explain the results | |
# Explain the results | |
st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.") | |
if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold: | |
st.write("The input data point is marked as Suspicious π© due to its anomaly score.") | |
st.write("The red 'x' marker indicates a suspicious location.") | |
else: | |
st.write("The input data point is marked as Valid β due to its anomaly score.") | |
st.write("The green 'o' marker indicates a valid location.") |