Spaces:
Runtime error
Runtime error
jaleesahmed
commited on
Commit
•
8f429ae
1
Parent(s):
49cb2d6
app
Browse files- app.py +54 -1
- requirements.txt +3 -1
app.py
CHANGED
@@ -3,6 +3,12 @@ import pandas as pd
|
|
3 |
from sklearn.preprocessing import LabelEncoder
|
4 |
from sklearn.feature_selection import mutual_info_classif
|
5 |
from sklearn.feature_selection import chi2
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def data_description(action_type):
|
8 |
df = pd.read_csv('emp_experience_data.csv')
|
@@ -34,9 +40,56 @@ def data_description(action_type):
|
|
34 |
for score, fname in sorted(zip(feature_scores, col_values), reverse=True)[:10]:
|
35 |
data.append([fname, score])
|
36 |
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
inputs = [
|
39 |
-
gr.Dropdown(["Input Data", "Target Data", "Feature Selection By Mutual Information", "Feature Selection By Chi Square"], label="Develop Data Models")
|
40 |
]
|
41 |
|
42 |
outputs = [gr.DataFrame()]
|
|
|
3 |
from sklearn.preprocessing import LabelEncoder
|
4 |
from sklearn.feature_selection import mutual_info_classif
|
5 |
from sklearn.feature_selection import chi2
|
6 |
+
from sklearn import metrics
|
7 |
+
from sklearn.ensemble import AdaBoostClassifier
|
8 |
+
from aif360.datasets import BinaryLabelDataset
|
9 |
+
from aif360.metrics import BinaryLabelDatasetMetric
|
10 |
+
from aif360.algorithms.preprocessing.reweighing import Reweighing
|
11 |
+
from sklearn.metrics import classification_report
|
12 |
|
13 |
def data_description(action_type):
|
14 |
df = pd.read_csv('emp_experience_data.csv')
|
|
|
40 |
for score, fname in sorted(zip(feature_scores, col_values), reverse=True)[:10]:
|
41 |
data.append([fname, score])
|
42 |
return data
|
43 |
+
if action_type == "AdaBoost Classifier":
|
44 |
+
data_selected = data_encoded[['EmployeeExperience', 'HealthBenefitsSatisfaction', 'SalarySatisfaction', 'Designation', 'HealthConscious',
|
45 |
+
'EmployeeFeedbackSentiments', 'Education', 'Gender', 'HoursOfTrainingAttendedLastYear', 'InternalJobMovement', 'Attrition']]
|
46 |
+
|
47 |
+
input_data = data_selected.drop(['Attrition'], axis=1)
|
48 |
+
target_data = data_selected[['Attrition']]
|
49 |
+
input_data = data_selected[0:150]
|
50 |
+
validation_data = data_selected[150:198]
|
51 |
+
validation_input_data = validation_data.drop(['Attrition'], axis=1)
|
52 |
+
validation_target_data = validation_data[['Attrition']]
|
53 |
+
test_data = data_selected[198:]
|
54 |
+
test_input_data = test_data.drop(['Attrition'], axis=1)
|
55 |
+
test_target_data = test_data[['Attrition']]
|
56 |
+
privileged_groups = [{'Gender': 0}]
|
57 |
+
unprivileged_groups = [{'Gender': 1}]
|
58 |
+
favorable_label = 0
|
59 |
+
unfavorable_label = 1
|
60 |
+
BM_dataset = BinaryLabelDataset(favorable_label=favorable_label,
|
61 |
+
unfavorable_label=unfavorable_label,
|
62 |
+
df=input_data,
|
63 |
+
label_names=['Attrition'],
|
64 |
+
protected_attribute_names=['Gender'],
|
65 |
+
unprivileged_protected_attributes=unprivileged_groups)
|
66 |
+
metric_orig_train = BinaryLabelDatasetMetric(BM_dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
|
67 |
+
RW = Reweighing(unprivileged_groups=unprivileged_groups,
|
68 |
+
privileged_groups=privileged_groups)
|
69 |
+
RW.fit(BM_dataset)
|
70 |
+
train_tf_dataset = RW.transform(BM_dataset)
|
71 |
+
metric_orig_train = BinaryLabelDatasetMetric(train_tf_dataset,
|
72 |
+
unprivileged_groups=unprivileged_groups,
|
73 |
+
privileged_groups=privileged_groups)
|
74 |
+
|
75 |
+
print("Difference in mean outcomes between unprivileged and privileged groups = %f"% metric_orig_train.mean_difference())
|
76 |
+
|
77 |
+
estimator = [100]
|
78 |
+
for i in estimator:
|
79 |
+
cls = AdaBoostClassifier(n_estimators=i)
|
80 |
+
cls.fit(train_tf_dataset.features, train_tf_dataset.labels,sample_weight=train_tf_dataset.instance_weights)
|
81 |
+
|
82 |
+
predicted_output = cls.predict(train_tf_dataset.features)
|
83 |
+
accuracy = metrics.accuracy_score(train_tf_dataset.labels, predicted_output)
|
84 |
+
report = classification_report(train_tf_dataset.labels, predicted_output)
|
85 |
+
df_train = pd.DataFrame(report).transpose()
|
86 |
+
predicted_output = cls.predict(validation_input_data)
|
87 |
+
accuracy = metrics.accuracy_score(validation_target_data, predicted_output)
|
88 |
+
report_pred = classification_report(validation_target_data, predicted_output)
|
89 |
+
df_pred = pd.DataFrame(report_pred).transpose()
|
90 |
|
91 |
inputs = [
|
92 |
+
gr.Dropdown(["Input Data", "Target Data", "Feature Selection By Mutual Information", "Feature Selection By Chi Square", "AdaBoost Classifier"], label="Develop Data Models")
|
93 |
]
|
94 |
|
95 |
outputs = [gr.DataFrame()]
|
requirements.txt
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
cufflinks
|
2 |
-
sklearn
|
|
|
|
|
|
1 |
cufflinks
|
2 |
+
sklearn
|
3 |
+
aif360
|
4 |
+
'aif360[AdversarialDebiasing]'
|