File size: 3,420 Bytes
2dd86d2
 
 
 
 
 
 
 
aecf454
2dd86d2
 
 
 
 
 
aecf454
 
 
 
39bd8cf
7a4032a
 
 
 
 
 
 
 
65a57b1
7a4032a
65a57b1
2dd86d2
 
7a4032a
 
 
 
 
 
2dd86d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bac9a5
 
 
 
 
2dd86d2
 
 
 
 
 
 
 
683046a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Import necessary libraries
import streamlit as st
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from joblib import load


# Load pre-trained model and scaler
with open('standard_scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

#with open('random_forest_model.pkl', 'rb') as model_file:
    #model = pickle.load(model_file)
# Load the model
model = load('random_forest_model.joblib')

def predict_fraud(user_input):
    user_input_amount = user_input['Amount']  
    user_input_features = user_input.drop(columns=['Amount'])  

    # Scale the amount column
    user_input_amount_scaled = scaler.transform(np.array(user_input_amount).reshape(-1, 1))
    
    # Reshape user_input_features if necessary
    if len(user_input_features.shape) == 1:
        user_input_features = user_input_features.values.reshape(1, -1)

    # Combine scaled amount with other features
    user_input_scaled = np.concatenate((user_input_features, user_input_amount_scaled), axis=1)
    
    # Ensure user_input_scaled is a 2D array
    if len(user_input_scaled.shape) == 1:
        user_input_scaled = user_input_scaled.reshape(1, -1)

    # Make predictions
    prediction = model.predict(user_input_scaled)[0]
    probability = model.predict_proba(user_input_scaled)[0][1]
    return prediction, probability

# Function to generate charts
def generate_charts(prediction, probability, amount):
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))

    # Pie chart for prediction distribution
    prediction_labels = ['Non-Fraudulent', 'Fraudulent']
    prediction_values = [1 - prediction, prediction]
    axes[0].pie(prediction_values, labels=prediction_labels, autopct='%1.1f%%', startangle=90, colors=['skyblue', 'lightcoral'])
    axes[0].set_title('Prediction Distribution')

    # Bar chart for probability distribution
    axes[1].bar(['Probability'], [probability], color='lightgreen' if prediction == 0 else 'lightcoral')
    axes[1].set_title('Probability of Fraud')

    # Display amount
    st.write(f"Transaction Amount: ${amount}")

    # Display charts
    st.pyplot(fig)

# Streamlit app
def main():
    st.title("Fraud Detection Prediction App")

    # User input fields
    user_input = []
    for i in range(1, 29):
        user_input.append(st.number_input(f'V{i}', value=0.0))

    min_amount = st.number_input('Minimum Amount', value=0.0)
    max_amount = st.number_input('Maximum Amount', value=10000.0)

    amount = st.number_input('Amount', min_value=min_amount, max_value=max_amount, value=(min_amount + max_amount) / 2)

    # Make prediction on button click
    if st.button('Predict'):
        #user_input_array = np.array(user_input)
        #user_input_array = np.append(user_input_array, amount)
        user_input_df = pd.DataFrame([user_input])  # Convert list of dictionaries to DataFrame
        user_input_df['Amount'] = amount
        prediction, probability = predict_fraud(user_input_df)
        
        # Display prediction result
        st.write(f"Prediction: {'Fraudulent Transaction' if prediction == 1 else 'Non-Fraudulent Transaction'}")
        st.write(f"Probability: {probability:.2%}")

        # Generate and display charts
        generate_charts(prediction, probability, amount)

if __name__ == "__main__":
    main()