Azarthehulk commited on
Commit
0b73a08
1 Parent(s): 467a62c

Upload 21075A6603-DecisioN_TREE.ipynb

Browse files
Files changed (1) hide show
  1. 21075A6603-DecisioN_TREE.ipynb +263 -0
21075A6603-DecisioN_TREE.ipynb ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "ab540ee7",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Decision Tree"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 2,
14
+ "id": "92d3ce84",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "from sklearn.metrics import confusion_matrix\n",
19
+ "from sklearn.model_selection import train_test_split\n",
20
+ "from sklearn.tree import DecisionTreeClassifier\n",
21
+ "from sklearn.metrics import accuracy_score\n",
22
+ "from sklearn.metrics import classification_report\n",
23
+ "from sklearn.datasets import load_iris\n",
24
+ "iris=load_iris()"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 3,
30
+ "id": "dd4c544d",
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "X,y=iris.data,iris.target"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 6,
40
+ "id": "abe99084",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "def train_using_gini(X_train, y_train):\n",
45
+ " clf_gini = DecisionTreeClassifier(criterion = \"gini\", random_state = 100,max_depth=3, min_samples_leaf=4)\n",
46
+ " clf_gini.fit(X_train, y_train)\n",
47
+ " return clf_gini"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 7,
53
+ "id": "3e9ddda5",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "#Using Entropy\n",
58
+ "def train_using_entropy(X_train,y_train):\n",
59
+ "#Creating a classifier object\n",
60
+ " clf_entropy = DecisionTreeClassifier(criterion=\"entropy\",random_state = 100,max_depth=3,min_samples_leaf=4)\n",
61
+ "#Training\n",
62
+ " clf_entropy.fit(X_train,y_train)\n",
63
+ " return clf_entropy"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 8,
69
+ "id": "74fd9b39",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "def prediction(X_test,clf_object):\n",
74
+ " y_pred=clf_object.predict(X_test)\n",
75
+ " print(\"Predicted values:\",y_pred)\n",
76
+ " return y_pred"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 9,
82
+ "id": "0b47818b",
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "#Function to calculate accuracy\n",
87
+ "def cal_accuracy(y_test,y_pred):\n",
88
+ " print(\"Confusion Matrix: \",confusion_matrix(y_test,y_pred))\n",
89
+ " print(\"Accuracy:\",accuracy_score(y_test,y_pred)*100)\n",
90
+ " print(\"Report :\",classification_report(y_test,y_pred))"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 10,
96
+ "id": "0f94ba7d",
97
+ "metadata": {},
98
+ "outputs": [
99
+ {
100
+ "name": "stdout",
101
+ "output_type": "stream",
102
+ "text": [
103
+ "Dimensions for training data (105, 4)\n",
104
+ "Dimensions for testing data (105,)\n"
105
+ ]
106
+ }
107
+ ],
108
+ "source": [
109
+ "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size = 0.3, random_state = 100)\n",
110
+ "print(\"Dimensions for training data\",X_train.shape)\n",
111
+ "print(\"Dimensions for testing data\",y_train.shape)"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 13,
117
+ "id": "a7ed365c",
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "name": "stdout",
122
+ "output_type": "stream",
123
+ "text": [
124
+ "Results Using Gini Index:\n",
125
+ "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n",
126
+ " 1 2 2 0 1 2 2 0]\n",
127
+ "Confusion Matrix: [[16 0 0]\n",
128
+ " [ 0 10 1]\n",
129
+ " [ 0 1 17]]\n",
130
+ "Accuracy: 95.55555555555556\n",
131
+ "Report : precision recall f1-score support\n",
132
+ "\n",
133
+ " 0 1.00 1.00 1.00 16\n",
134
+ " 1 0.91 0.91 0.91 11\n",
135
+ " 2 0.94 0.94 0.94 18\n",
136
+ "\n",
137
+ " accuracy 0.96 45\n",
138
+ " macro avg 0.95 0.95 0.95 45\n",
139
+ "weighted avg 0.96 0.96 0.96 45\n",
140
+ "\n"
141
+ ]
142
+ }
143
+ ],
144
+ "source": [
145
+ "#Gini Index\n",
146
+ "clf_gini = train_using_gini(X_train, y_train)\n",
147
+ "print(\"Results Using Gini Index:\")\n",
148
+ "# Prediction using gini\n",
149
+ "y_pred_gini = prediction(X_test, clf_gini)\n",
150
+ "cal_accuracy(y_test, y_pred_gini)"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": 14,
156
+ "id": "0cd3759c",
157
+ "metadata": {},
158
+ "outputs": [
159
+ {
160
+ "name": "stdout",
161
+ "output_type": "stream",
162
+ "text": [
163
+ "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n",
164
+ " 1 2 2 0 1 2 2 0]\n",
165
+ "Confusion Matrix: [[16 0 0]\n",
166
+ " [ 0 10 1]\n",
167
+ " [ 0 1 17]]\n",
168
+ "Accuracy: 95.55555555555556\n",
169
+ "Report : precision recall f1-score support\n",
170
+ "\n",
171
+ " 0 1.00 1.00 1.00 16\n",
172
+ " 1 0.91 0.91 0.91 11\n",
173
+ " 2 0.94 0.94 0.94 18\n",
174
+ "\n",
175
+ " accuracy 0.96 45\n",
176
+ " macro avg 0.95 0.95 0.95 45\n",
177
+ "weighted avg 0.96 0.96 0.96 45\n",
178
+ "\n"
179
+ ]
180
+ }
181
+ ],
182
+ "source": [
183
+ "#Analysing Metrics using entropy\n",
184
+ "clf_entropy = train_using_entropy(X_train,y_train)\n",
185
+ "# Prediction using entropy\n",
186
+ "y_pred_entropy = prediction(X_test, clf_entropy)\n",
187
+ "cal_accuracy(y_test, y_pred_entropy)"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 19,
193
+ "id": "bfb36a8a",
194
+ "metadata": {},
195
+ "outputs": [
196
+ {
197
+ "name": "stdout",
198
+ "output_type": "stream",
199
+ "text": [
200
+ "Results Using Gini Index:\n",
201
+ "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n",
202
+ " 1 2 2 0 1 2 2 0]\n",
203
+ "Confusion Matrix: [[16 0 0]\n",
204
+ " [ 0 10 1]\n",
205
+ " [ 0 1 17]]\n",
206
+ "Accuracy: 95.55555555555556\n",
207
+ "Report : precision recall f1-score support\n",
208
+ "\n",
209
+ " 0 1.00 1.00 1.00 16\n",
210
+ " 1 0.91 0.91 0.91 11\n",
211
+ " 2 0.94 0.94 0.94 18\n",
212
+ "\n",
213
+ " accuracy 0.96 45\n",
214
+ " macro avg 0.95 0.95 0.95 45\n",
215
+ "weighted avg 0.96 0.96 0.96 45\n",
216
+ "\n"
217
+ ]
218
+ }
219
+ ],
220
+ "source": [
221
+ "#lets observe what the result will be if we change dept to 2 and leafs to 3\n",
222
+ "def train_using_gini(X_train, y_train):\n",
223
+ " clf_gini = DecisionTreeClassifier(criterion = \"gini\", random_state = 150,max_depth=5, min_samples_leaf=3)\n",
224
+ " clf_gini.fit(X_train, y_train)\n",
225
+ " return clf_gini\n",
226
+ "clf_gini = train_using_gini(X_train, y_train)\n",
227
+ "print(\"Results Using Gini Index:\")\n",
228
+ "# Prediction using gini\n",
229
+ "y_pred_gini = prediction(X_test, clf_gini)\n",
230
+ "cal_accuracy(y_test, y_pred_gini)"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": null,
236
+ "id": "1ec89b9d",
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": []
240
+ }
241
+ ],
242
+ "metadata": {
243
+ "kernelspec": {
244
+ "display_name": "Python 3 (ipykernel)",
245
+ "language": "python",
246
+ "name": "python3"
247
+ },
248
+ "language_info": {
249
+ "codemirror_mode": {
250
+ "name": "ipython",
251
+ "version": 3
252
+ },
253
+ "file_extension": ".py",
254
+ "mimetype": "text/x-python",
255
+ "name": "python",
256
+ "nbconvert_exporter": "python",
257
+ "pygments_lexer": "ipython3",
258
+ "version": "3.9.13"
259
+ }
260
+ },
261
+ "nbformat": 4,
262
+ "nbformat_minor": 5
263
+ }