montebello-642 commited on
Commit
6b561c6
1 Parent(s): 246de3f

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split, cross_val_score
3
+ from sklearn.preprocessing import StandardScaler
4
+ from sklearn.linear_model import LogisticRegression
5
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
6
+ import seaborn as sns
7
+ import matplotlib.pyplot as plt
8
+ import gradio as gr
9
+
10
+ #loading the dataset and select only the columns needed
11
+ selected_columns = ['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force', 'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language', 'outcome_description']
12
+ df = pd.read_csv('my_dataset_logistic.csv', usecols=selected_columns)
13
+
14
+ print(df.columns)
15
+ print(df.head())
16
+ print(df.describe())
17
+ print(df.isnull().sum())
18
+
19
+ #set the name of the column to calculate accuracy
20
+ X = df.drop('outcome_description', axis=1)
21
+ y = df['outcome_description']
22
+ X.fillna(0, inplace=True)
23
+
24
+ #split into training and test set
25
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
+
27
+ #standardize the features
28
+ scaler = StandardScaler()
29
+ X_train_scaled = scaler.fit_transform(X_train)
30
+ X_test_scaled = scaler.transform(X_test)
31
+
32
+ #train the model
33
+ model = LogisticRegression(random_state=42)
34
+ model.fit(X_train_scaled, y_train)
35
+
36
+ #make predictions and evaluate the model
37
+ y_pred = model.predict(X_test_scaled)
38
+ accuracy = accuracy_score(y_test, y_pred)
39
+ print(f'Accuracy: {accuracy:.2f}')
40
+
41
+ #classification report with confusion matrix, correlation graph and standard deviation of all the variables
42
+ print(classification_report(y_test, y_pred))
43
+
44
+ # Confusion Matrix
45
+ conf_matrix = confusion_matrix(y_test, y_pred)
46
+ plt.figure(figsize=(8, 6))
47
+ sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,xticklabels=df['outcome_description'].unique(), yticklabels=df['outcome_description'].unique())
48
+ plt.title("Confusion Matrix")
49
+ plt.xlabel("Predicted")
50
+ plt.ylabel("Actual")
51
+ plt.show()
52
+
53
+ #Correlation Matrix
54
+ correlation_matrix = df.corr()
55
+ plt.figure(figsize=(10, 8))
56
+ sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
57
+ plt.title('Correlation Matrix')
58
+ plt.show()
59
+
60
+ #plotting a bar chart to visualize better the correlation
61
+ target_correlations = correlation_matrix['outcome_description'].sort_values(ascending=False)
62
+ plt.figure(figsize=(10, 6))
63
+ target_correlations.drop('outcome_description').plot(kind='bar', color='blue')
64
+ plt.title('Correlations with Target Variable')
65
+ plt.xlabel('Features')
66
+ plt.ylabel('Correlation')
67
+ plt.show()
68
+
69
+ #Standard Deviation
70
+ std_dev = df.std()
71
+ print('\nStandard deviation')
72
+ print(std_dev)
73
+
74
+ #gradio implementation
75
+ #create the available options for the ethnicities
76
+ mos_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
77
+ complainant_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
78
+
79
+ #defining the function to make predictions using the model
80
+ def predict_outcome_duration(mos_ethnicity, complainant_ethnicity, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language, duration_mo):
81
+ try:
82
+ #converting values from string to int
83
+ mos_ethnicity_encoded = mos_ethnicity_options.index(mos_ethnicity)
84
+ complainant_ethnicity_encoded = complainant_ethnicity_options.index(complainant_ethnicity)
85
+
86
+ #converting checkbox value to int
87
+ is_force = int(is_force)
88
+ is_abuse_of_authority = int(is_abuse_of_authority)
89
+ is_discourtesy = int(is_discourtesy)
90
+ is_offensive_language = int(is_offensive_language)
91
+
92
+ input_data = [[duration_mo, mos_ethnicity_encoded, complainant_ethnicity_encoded, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language]]
93
+ input_scaled = scaler.transform(input_data)
94
+ prediction = model.predict(input_scaled)[0]
95
+
96
+ #outputting the result
97
+ return "Arrest" if prediction == 1 else "No Arrest"
98
+
99
+ except Exception as e:
100
+ return f"Error: {str(e)}"
101
+
102
+ #creating the gradio interface, using dropdowns to show the different ethnicities, checkbox to identify which type of allegation it was and a slider with the duration in months
103
+ mos_ethnicity_dropdown = gr.Dropdown(choices=mos_ethnicity_options,label="Defendant Ethnicity")
104
+ complainant_ethnicity_dropdown = gr.Dropdown(choices=complainant_ethnicity_options, label="Complainant Ethnicity")
105
+ is_force_checkbox = gr.Checkbox()
106
+ is_abuse_of_authority_checkbox = gr.Checkbox()
107
+ is_discourtesy_checkbox = gr.Checkbox()
108
+ is_offensive_language_checkbox = gr.Checkbox()
109
+ duration_mo_slider = gr.Slider(minimum=0, maximum=20, label="Duration in months")
110
+
111
+ iface = gr.Interface(
112
+ fn=predict_outcome_duration,
113
+ inputs=[complainant_ethnicity_dropdown, mos_ethnicity_dropdown, is_force_checkbox, is_abuse_of_authority_checkbox, is_discourtesy_checkbox, is_offensive_language_checkbox, duration_mo_slider],
114
+ outputs="text",
115
+ live=True,
116
+ title="Complaint Outcome Prediction"
117
+ )
118
+
119
+ # Launch the Gradio Interface
120
+ iface.launch(share=True)