Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.feature_selection import mutual_info_classif | |
from sklearn.feature_selection import chi2 | |
from sklearn import metrics | |
from sklearn.ensemble import AdaBoostClassifier | |
from aif360.datasets import BinaryLabelDataset | |
from aif360.metrics import BinaryLabelDatasetMetric | |
from aif360.algorithms.preprocessing.reweighing import Reweighing | |
from sklearn.metrics import classification_report | |
def data_description(action_type): | |
df = pd.read_csv('emp_experience_data.csv') | |
pd.options.display.max_columns = 25 | |
pd.options.display.max_rows = 10 | |
data_encoded = df.copy(deep=True) | |
categorical_column = ['Attrition', 'Gender', 'BusinessTravel', 'Education', 'EmployeeExperience', 'EmployeeFeedbackSentiments', 'Designation', | |
'SalarySatisfaction', 'HealthBenefitsSatisfaction', 'UHGDiscountProgramUsage', 'HealthConscious', 'CareerPathSatisfaction', 'Region'] | |
label_encoding = LabelEncoder() | |
for col in categorical_column: | |
data_encoded[col] = label_encoding.fit_transform(data_encoded[col]) | |
input_data = data_encoded.drop(['Attrition'], axis=1) | |
target_data = data_encoded[['Attrition']] | |
col_values = list(input_data.columns.values) | |
if action_type == "Input Data": | |
return input_data.head() | |
if action_type == "Target Data": | |
return target_data.head() | |
if action_type == "Feature Selection By Mutual Information": | |
feature_scores = mutual_info_classif(input_data, target_data) | |
data = [["Feature", "Mutual Information (0: independent, 1: dependent)"]] | |
for score, fname in sorted(zip(feature_scores, col_values), reverse=True)[:10]: | |
data.append([fname, score]) | |
return data | |
if action_type == "Feature Selection By Chi Square": | |
feature_scores = chi2(input_data, target_data)[0] | |
data = [["Feature", "Chi-Square (Frequency Distribution)"]] | |
for score, fname in sorted(zip(feature_scores, col_values), reverse=True)[:10]: | |
data.append([fname, score]) | |
return data | |
if action_type == "AdaBoost Classifier": | |
data_selected = data_encoded[['EmployeeExperience', 'HealthBenefitsSatisfaction', 'SalarySatisfaction', 'Designation', 'HealthConscious', | |
'EmployeeFeedbackSentiments', 'Education', 'Gender', 'HoursOfTrainingAttendedLastYear', 'InternalJobMovement', 'Attrition']] | |
input_data = data_selected.drop(['Attrition'], axis=1) | |
target_data = data_selected[['Attrition']] | |
input_data = data_selected[0:150] | |
validation_data = data_selected[150:198] | |
validation_input_data = validation_data.drop(['Attrition'], axis=1) | |
validation_target_data = validation_data[['Attrition']] | |
test_data = data_selected[198:] | |
test_input_data = test_data.drop(['Attrition'], axis=1) | |
test_target_data = test_data[['Attrition']] | |
privileged_groups = [{'Gender': 0}] | |
unprivileged_groups = [{'Gender': 1}] | |
favorable_label = 0 | |
unfavorable_label = 1 | |
BM_dataset = BinaryLabelDataset(favorable_label=favorable_label, | |
unfavorable_label=unfavorable_label, | |
df=input_data, | |
label_names=['Attrition'], | |
protected_attribute_names=['Gender'], | |
unprivileged_protected_attributes=unprivileged_groups) | |
metric_orig_train = BinaryLabelDatasetMetric(BM_dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) | |
RW = Reweighing(unprivileged_groups=unprivileged_groups, | |
privileged_groups=privileged_groups) | |
RW.fit(BM_dataset) | |
train_tf_dataset = RW.transform(BM_dataset) | |
metric_orig_train = BinaryLabelDatasetMetric(train_tf_dataset, | |
unprivileged_groups=unprivileged_groups, | |
privileged_groups=privileged_groups) | |
print("Difference in mean outcomes between unprivileged and privileged groups = %f"% metric_orig_train.mean_difference()) | |
estimator = [100] | |
for i in estimator: | |
cls = AdaBoostClassifier(n_estimators=i) | |
cls.fit(train_tf_dataset.features, train_tf_dataset.labels,sample_weight=train_tf_dataset.instance_weights) | |
predicted_output = cls.predict(train_tf_dataset.features) | |
accuracy = metrics.accuracy_score(train_tf_dataset.labels, predicted_output) | |
report = classification_report(train_tf_dataset.labels, predicted_output) | |
df_train = pd.DataFrame(report).transpose() | |
predicted_output = cls.predict(validation_input_data) | |
accuracy = metrics.accuracy_score(validation_target_data, predicted_output) | |
report_pred = classification_report(validation_target_data, predicted_output) | |
df_pred = pd.DataFrame(report_pred).transpose() | |
inputs = [ | |
gr.Dropdown(["Input Data", "Target Data", "Feature Selection By Mutual Information", "Feature Selection By Chi Square", "AdaBoost Classifier"], label="Develop Data Models") | |
] | |
outputs = [gr.DataFrame()] | |
demo = gr.Interface( | |
fn = data_description, | |
inputs = inputs, | |
outputs = outputs, | |
title="Employee-Experience: Model Development", | |
allow_flagging=False | |
) | |
if __name__ == "__main__": | |
demo.launch() |