Spaces:

flokabukie
/

Credit_Card_Fraud_Detection

Runtime error

App Files Files Community

Credit_Card_Fraud_Detection / app.py

flokabukie

Create app.py

5b475df about 1 year ago

raw

history blame

5.93 kB

	import streamlit as st
	import numpy as np
	import joblib
	from sklearn.preprocessing import StandardScaler
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Load the non-anomaly data
	non_anomaly_csv_filename = 'non_anomaly_data.csv'
	non_anomaly_df = pd.read_csv(non_anomaly_csv_filename)

	# Open the Mitos Spreadsheet file
	#st.write("Opening Mitos Spreadsheet file...")
	#st.csv_open("non_anomaly_data.csv")

	# Display the first sheet
	#st.write(st.get_active_sheet().name)

	# Display the first row of the first sheet
	#st.write(st.get_active_sheet().rows[0])

	# Load the Isolation Forest model
	model_filename = "IsolationForest.joblib"
	isolation_forest = joblib.load(model_filename)

	# Load the StandardScaler
	scaler_filename = "StandardScaler.joblib"
	scaler = joblib.load(scaler_filename)

	st.title("Anomaly Detection App with Isolation Forest")

	st.sidebar.title("Input Feature Values")
	transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0)
	longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0)
	latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0)
	credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000)
	year = st.sidebar.slider("Year", min_value=2000, max_value=2030)
	month = st.sidebar.slider("Month", min_value=1, max_value=12)
	day = st.sidebar.slider("Day", min_value=1, max_value=31)

	submitted = st.sidebar.button("Submit")

	if submitted:
	input_data = {
	'transaction_dollar_amount': transaction_dollar_amount,
	'Long': longitude,
	'Lat': latitude,
	'credit_card_limit': credit_card_limit,
	'year': year,
	'month': month,
	'day': day
	}

	selected_columns = pd.DataFrame([input_data])

	# Standardize the input data using the loaded StandardScaler
	selected_columns_scaled = scaler.transform(selected_columns)

	# Apply Isolation Forest for anomaly detection on the non-anomaly dataset
	non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df))

	# Apply Isolation Forest for anomaly detection on your single input data
	your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0]



	# Calculate the minimum and maximum anomaly scores from non-anomaly data
	min_non_anomaly_score = np.min(non_anomaly_scores)
	max_non_anomaly_score = np.max(non_anomaly_scores)

	# Add a margin of error for the range
	margin = 0.5
	min_threshold = min_non_anomaly_score - margin
	max_threshold = max_non_anomaly_score + margin

	# Determine if the input data point is an anomaly based on the score
	#is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95)

	# Determine if the input data point is an anomaly based on the score
	is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold


	# Print the anomaly status
	st.subheader("Anomaly Classification")
	if is_anomaly:
	st.write("Prediction Result: 🚨 Anomaly Detected!")
	else:
	st.write("Prediction Result: ✅ Not Anomaly")

	# Create a bar plot to visualize the anomaly score distribution and your data point's score
	plt.figure(figsize=(8, 5))

	# Plot the distribution of anomaly scores from the non-anomaly dataset
	sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution')

	# Plot your data point's anomaly score
	plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point')

	# Set labels and title
	plt.xlabel('Anomaly Score')
	plt.ylabel('Frequency')
	plt.title('Anomaly Score Distribution and Your Data Point')
	plt.legend()
	#plt.grid(True)

	# Display the histogram plot
	st.pyplot(plt)


	# Explain the results
	st.write("The input data point has been classified as an anomaly." if is_anomaly
	else "The input data point is not classified as an anomaly.")
	st.write("The anomaly score is:", your_anomaly_score)
	st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold)

	# Create a scatter plot for longitude and latitude
	fig, ax = plt.subplots(figsize=(10, 8))

	# Plot non-anomaly data
	sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal 🏙️', ax=ax)

	# Plot input data
	if is_anomaly:
	ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious 🚩', s=100, marker='x')
	anomaly_marker = 'Suspicious 🚩'
	else:
	ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid ✅', s=100, marker='o')
	anomaly_marker = 'Valid ✅'

	ax.set_xlabel("Longitude")
	ax.set_ylabel("Latitude")
	ax.set_title("Location Plot: Anomaly Detection 🗺️")
	ax.legend()
	ax.grid(True)

	# Show the scatter plot in Streamlit
	st.subheader("Location Plot: Anomaly Detection 🗺️")
	st.pyplot(fig)

	# Explanation based on the anomaly classification
	st.subheader("Anomaly Classification")
	if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
	st.write("Prediction Result: 🚨 Anomaly Detected!")
	else:
	st.write("Prediction Result: ✅ Not Anomaly")

	# Explain the results
	# Explain the results
	st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.")
	if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
	st.write("The input data point is marked as Suspicious 🚩 due to its anomaly score.")
	st.write("The red 'x' marker indicates a suspicious location.")
	else:
	st.write("The input data point is marked as Valid ✅ due to its anomaly score.")
	st.write("The green 'o' marker indicates a valid location.")