Spaces:
Build error
Build error
LethallyHealthy
commited on
Commit
·
0b6328e
1
Parent(s):
2066208
Delete app.py
Browse files
app.py
DELETED
@@ -1,125 +0,0 @@
|
|
1 |
-
#import the necessary dependencies
|
2 |
-
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
-
import lightgbm as lgb
|
5 |
-
from lightgbm.callback import early_stopping
|
6 |
-
|
7 |
-
from sklearn.ensemble import GradientBoostingRegressor
|
8 |
-
from sklearn.model_selection import train_test_split
|
9 |
-
from sklearn.metrics import mean_squared_error
|
10 |
-
from sklearn.metrics import r2_score
|
11 |
-
from sklearn.metrics import accuracy_score
|
12 |
-
|
13 |
-
#load the data
|
14 |
-
train_data = pd.read_csv("train.csv")
|
15 |
-
test_data = pd.read_csv("test.csv")
|
16 |
-
|
17 |
-
|
18 |
-
#clean the data
|
19 |
-
cleanup_cats = {"MSZoning": {"A": 1, "C (all)": 2, "FV": 3, "I": 4, "RH": 5, "RL": 6, "RP": 7, "RM": 0},
|
20 |
-
"Street": {"Grvl": 1, "Pave": 0},
|
21 |
-
"Alley" : {"NA": 1, "Grvl": 2, "Pave": 0},
|
22 |
-
"LotShape": {"Reg": 1, "IR1": 2, "IR2": 3, "IR3": 0},
|
23 |
-
"LandContour": {"Lvl": 1, "Bnk": 2, "HLS": 3, "Low": 0},
|
24 |
-
"Utilities": {"AllPub": 0, "NoSewr": 3, "NoSeWa" : 2, "ELO": 1},
|
25 |
-
"LotConfig": {"Inside": 1, "Corner": 2, "CulDSac": 3, "FR2": 4, "FR3": 0},
|
26 |
-
"LandSlope": {"Gtl": 1, "Mod": 2, "Sev": 0},
|
27 |
-
"Neighborhood": {"Blmngtn": 1, "Blueste": 2, "BrDale": 3, "BrkSide": 4, "ClearCr": 5, "CollgCr": 6, "Crawfor": 7, "Edwards" : 8, "Gilbert": 9, "IDOTRR": 10, "MeadowV": 11, "Mitchel": 12, "NAmes": 13, "NoRidge": 14, "NPkVill": 15, "NridgHt": 16, "NWAmes": 17, "OldTown": 18, "SWISU": 19, "Sawyer": 20, "SawyerW": 21, "Somerst": 22, "StoneBr": 23, "Timber": 24, "Veenker": 0},
|
28 |
-
"Condition1": {"Artery": 1, "Feedr": 2, "Norm": 3, "RRNn": 4, "RRAn": 5, "PosN": 6, "PosA": 7, "RRNe": 8, "RRAe": 0},
|
29 |
-
"Condition2": {"Artery": 1, "Feedr": 2, "Norm": 3, "RRNn": 4, "RRAn": 5, "PosN": 6, "PosA": 7, "RRNe": 8, "RRAe": 0},
|
30 |
-
"BldgType": {"1Fam": 1, "2fmCon": 2, "Duplex": 3, "TwnhsE": 4, "TwnhsI": 5, "Twnhs": 0},
|
31 |
-
"HouseStyle": {"1Story": 1, "1.5Fin": 2, "1.5Unf": 3, "2Story": 4, "2.5Fin": 5, "2.5Unf": 6, "SFoyer": 7, "SLvl": 0},
|
32 |
-
"RoofStyle": {"Flat": 1, "Gable": 2, "Gambrel": 3, "Hip": 4, "Mansard": 5, "Shed": 0},
|
33 |
-
"RoofMatl": {"ClyTile": 1, "CompShg": 2, "Membran": 3, "Metal": 4, "Roll": 5, "Tar&Grv": 6, "WdShake": 7, "WdShngl": 0},
|
34 |
-
"Exterior1st": {"AsbShng": 1, "AsphShn": 2, "BrkComm": 3, "BrkFace": 4, "CBlock": 5, "CemntBd": 6, "HdBoard": 7, "ImStucc": 8, "MetalSd": 9, "Other": 10, "Plywood": 11, "Precast": 12, "Stone": 13, "Stucco": 14, "VinylSd": 15, "WdShing": 16, "Wd Sdng": 0},
|
35 |
-
"Exterior2nd": {"AsbShng": 1, "AsphShn": 2, "Brk Cmn": 3, "BrkFace": 4, "CBlock": 5, "CmentBd": 6, "HdBoard": 7, "ImStucc": 8, "MetalSd": 9, "Other": 10, "Plywood": 11, "Precast": 12, "Stone": 13, "Stucco": 14, "VinylSd": 15, "Wd Shng": 16, "Wd Sdng": 0},
|
36 |
-
"MasVnrType": {"None": 1, "BrkCmn": 2, "BrkFace": 3, "CBlock": 4, "Stone": 0},
|
37 |
-
"ExterQual": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 0},
|
38 |
-
"ExterCond": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 0},
|
39 |
-
"Foundation": {"BrkTil": 1, "CBlock": 2, "PConc": 3, "Slab": 4, "Stone": 5, "Wood": 0},
|
40 |
-
"BsmtQual": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5, "NA": 0},
|
41 |
-
"BsmtCond": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5, "NA": 0},
|
42 |
-
"BsmtExposure": {"NA": 1, "No": 2, "Mn": 3, "Av": 4, "Gd": 0},
|
43 |
-
"BsmtFinType1": {"GLQ": 1, "ALQ": 2, "BLQ": 3, "Rec": 4, "LwQ": 5, "Unf": 6, "NA": 0},
|
44 |
-
"BsmtFinType2": {"GLQ": 1, "ALQ": 2, "BLQ": 3, "Rec": 4, "LwQ": 5, "Unf": 6, "NA": 0},
|
45 |
-
"Heating": {"Floor": 1, "GasA": 2, "GasW": 3, "Grav": 4, "OthW": 5, "Wall": 0},
|
46 |
-
"HeatingQC": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 0},
|
47 |
-
"CentralAir": {"N": 0, "Y": 1},
|
48 |
-
"Electrical": {"SBrkr": 1, "FuseA": 2, "FuseF": 3, "FuseP": 4, "Mix": 0},
|
49 |
-
"KitchenQual": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 0},
|
50 |
-
"Functional": {"Typ": 1, "Min1": 2, "Min2": 3, "Mod": 4, "Maj1": 5, "Maj2": 6, "Sev": 7, "Sal": 0},
|
51 |
-
"FireplaceQu": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5, "NA": 0},
|
52 |
-
"GarageType": {"2Types": 1, "Attchd": 2, "Basment": 3, "BuiltIn": 4, "CarPort": 5, "Detchd": 6, "NA": 0},
|
53 |
-
"GarageFinish": {"NA": 1, "Unf": 2, "RFn": 3, "Fin": 0},
|
54 |
-
"GarageQual": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5, "NA": 0},
|
55 |
-
"GarageCond": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5, "NA": 0},
|
56 |
-
"PavedDrive": {"N": 0, "P": 1, "Y": 2},
|
57 |
-
"PoolQC": {"Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 0},
|
58 |
-
"Fence": {"NA": 1, "MnWw": 2,"GdWo": 3, "MnPrv": 4, "GdPrv": 0},
|
59 |
-
"MiscFeature": {"Elev": 1, "Gar2": 2, "Othr": 3, "Shed": 4, "TenC": 5, "NA": 0},
|
60 |
-
"SaleType": {"WD": 1, "CWD": 2, "VWD": 3, "New": 4, "COD": 5, "Con": 6, "ConLw": 7, "ConLI": 8, "ConLD": 9, "Oth": 0},
|
61 |
-
"SaleCondition":{"Normal": 1, "Abnorml": 2, "AdjLand": 3, "Alloca": 4, "Family": 5, "Partial": 0}}
|
62 |
-
|
63 |
-
|
64 |
-
#Drop ID columns
|
65 |
-
train_data = train_data.drop("Id", axis = 1)
|
66 |
-
test_data = test_data.drop("Id", axis = 1)
|
67 |
-
|
68 |
-
#this is to encode each category with integers
|
69 |
-
train_data = train_data.replace(cleanup_cats);
|
70 |
-
test_data = test_data.replace(cleanup_cats);
|
71 |
-
|
72 |
-
#remove columns with NaN
|
73 |
-
removals = train_data.columns[train_data.isnull().any()]
|
74 |
-
Train_data = train_data.drop(removals, axis = 1)
|
75 |
-
Test_data = test_data.drop(removals,axis=1 )
|
76 |
-
|
77 |
-
#split the training data set
|
78 |
-
X_train, X_test, y_train, y_test = train_test_split(Train_data.drop('SalePrice', axis=1), Train_data['SalePrice'], test_size=0.2, shuffle = True, random_state=42)
|
79 |
-
|
80 |
-
training_data=lgb.Dataset(X_train,label=y_train)
|
81 |
-
valid_data=lgb.Dataset(X_test,label=y_test)
|
82 |
-
|
83 |
-
#parameters taken from optuna training
|
84 |
-
params = {
|
85 |
-
"objective": "regression",
|
86 |
-
"n_estimators": 10000,
|
87 |
-
"learning_rate": 0.08828308704850689,
|
88 |
-
"num_leaves": 256,
|
89 |
-
"max_depth": 11,
|
90 |
-
"min_data_in_leaf": 100,
|
91 |
-
"lambda_l1": 55,
|
92 |
-
"lambda_l2": 60,
|
93 |
-
"min_gain_to_split": 13.827512822883651,
|
94 |
-
"bagging_fraction": 1.0,
|
95 |
-
"bagging_freq": 1,
|
96 |
-
"feature_fraction": 0.4
|
97 |
-
}
|
98 |
-
model = lgb.train(params,
|
99 |
-
training_data,
|
100 |
-
valid_sets=valid_data,
|
101 |
-
callbacks = [early_stopping(300)]
|
102 |
-
)
|
103 |
-
def make_a_prediction(M, X):
|
104 |
-
prediction = M.predict(X)
|
105 |
-
return prediction
|
106 |
-
|
107 |
-
predictions = make_a_prediction(model, X_test)
|
108 |
-
print(predictions)
|
109 |
-
|
110 |
-
import shap
|
111 |
-
|
112 |
-
explainer = shap.TreeExplainer(model)
|
113 |
-
shap_values = explainer.shap_values(X_test)
|
114 |
-
shap.initjs()
|
115 |
-
shap.force_plot(explainer.expected_value, shap_values=shap_values, feature_names=X_test.columns)
|
116 |
-
|
117 |
-
shap.initjs()
|
118 |
-
shap.decision_plot(explainer.expected_value, shap_values, feature_names=np.array(X_test.columns))
|
119 |
-
|
120 |
-
shap.initjs()
|
121 |
-
shap.summary_plot(shap_values=shap_values, feature_names=X_test.columns)
|
122 |
-
|
123 |
-
interaction_values = explainer.shap_interaction_values(X_test)
|
124 |
-
interaction_values[0].round(2)
|
125 |
-
pd.DataFrame(interaction_values[0].round(2)).head(60)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|