Spaces:
Runtime error
Runtime error
Commit
·
9492b91
1
Parent(s):
ed5235b
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,526 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import the Libraries
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
from sklearn.preprocessing import LabelEncoder
|
5 |
+
|
6 |
+
# Libraries for EDA
|
7 |
+
import pandas_profiling as pp
|
8 |
+
from streamlit_pandas_profiling import st_profile_report
|
9 |
+
import sweetviz as sv
|
10 |
+
import codecs
|
11 |
+
import streamlit.components.v1 as components
|
12 |
+
|
13 |
+
# Libraries for Model Validation and Test
|
14 |
+
from sklearn.tree import DecisionTreeClassifier
|
15 |
+
from sklearn.model_selection import train_test_split
|
16 |
+
from sklearn.metrics import classification_report , accuracy_score , f1_score, confusion_matrix
|
17 |
+
import matplotlib.pyplot as plt
|
18 |
+
import seaborn as sns
|
19 |
+
|
20 |
+
st.set_page_config(page_title='Mushroom Classification', page_icon='mushroom1.jpg')
|
21 |
+
#,layout = "wide" , initial_sidebar_state="collapsed")
|
22 |
+
|
23 |
+
|
24 |
+
if 'data' not in st.session_state:
|
25 |
+
st.session_state['data'] = pd.DataFrame()
|
26 |
+
if 'data_label' not in st.session_state:
|
27 |
+
st.session_state['data_label'] = pd.DataFrame()
|
28 |
+
|
29 |
+
if 'start' not in st.session_state:
|
30 |
+
st.session_state['start'] = 0
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
def main_page():
|
35 |
+
st.image('mushroom3.jpg')
|
36 |
+
st.markdown("<h1 style='text-align: center;'>Mushroom Classification</h1>", unsafe_allow_html=True)
|
37 |
+
st.write('---')
|
38 |
+
st.session_state['start'] = 1
|
39 |
+
|
40 |
+
def change():
|
41 |
+
st.session_state['data'] = pd.DataFrame()
|
42 |
+
st.session_state['data_label'] = pd.DataFrame()
|
43 |
+
|
44 |
+
# Input DataSet Used to Train the Model---------------------------------------------------------------------------------------------------------------------------------------
|
45 |
+
|
46 |
+
st.sidebar.header('User Input Parameters')
|
47 |
+
st.sidebar.write('---')
|
48 |
+
data_file = st.sidebar.selectbox(
|
49 |
+
label ="DataSet for Model Training",
|
50 |
+
options=['Default','Upload'], on_change=change())
|
51 |
+
|
52 |
+
# To use default file for training the model
|
53 |
+
if data_file == 'Default':
|
54 |
+
st.subheader('Input DataFrame')
|
55 |
+
data = pd.read_csv('mushrooms.csv')
|
56 |
+
st.session_state['data'] = data
|
57 |
+
st.dataframe(data)
|
58 |
+
use_cols = ['class','spore-print-color','gill-color','gill-size','stalk-root',
|
59 |
+
'habitat','stalk-shape','odor','population']
|
60 |
+
data_label = data[use_cols]
|
61 |
+
st.session_state['data_label'] = data_label
|
62 |
+
|
63 |
+
# Upload another file
|
64 |
+
if data_file == 'Upload':
|
65 |
+
file = st.sidebar.file_uploader('Upload DataSet In "csv" formate', type = 'csv', key='a')
|
66 |
+
|
67 |
+
if file == None:
|
68 |
+
st.error('Please Upload the file')
|
69 |
+
st.stop()
|
70 |
+
|
71 |
+
else:
|
72 |
+
data = pd.read_csv(file)
|
73 |
+
st.session_state['data'] = data
|
74 |
+
st.subheader('Input DataSet')
|
75 |
+
st.dataframe(data)
|
76 |
+
|
77 |
+
# Columns use to Train the Model (columns which are more important, based on Feature Importance)
|
78 |
+
use_cols = ['class','spore-print-color','gill-color','gill-size','stalk-root',
|
79 |
+
'habitat','stalk-shape','odor','population']
|
80 |
+
|
81 |
+
# What to do if Columns is Present OR Not Present
|
82 |
+
try:
|
83 |
+
data_label = data[use_cols]
|
84 |
+
st.session_state['data_label'] = data_label
|
85 |
+
|
86 |
+
except:
|
87 |
+
st.error('Please Upload the correct file, your file must contain below columns')
|
88 |
+
st.write(pd.DataFrame(use_cols, columns=['columns']))
|
89 |
+
st.stop()
|
90 |
+
|
91 |
+
# Input DataSet is Taken------------------------------------------------------------------------------------------------------------------------------------------------------
|
92 |
+
|
93 |
+
|
94 |
+
def eda():
|
95 |
+
# import pandas_profiling as pp
|
96 |
+
# from streamlit_pandas_profiling import st_profile_report
|
97 |
+
|
98 |
+
# import sweetviz as sv
|
99 |
+
# import codecs
|
100 |
+
# import streamlit.components.v1 as components
|
101 |
+
|
102 |
+
st.image('mushroom21.jpg', 'Mushroom Classification')
|
103 |
+
st.header('**EDA**')
|
104 |
+
st.write('---')
|
105 |
+
|
106 |
+
try:
|
107 |
+
if 'pp_eda_report' not in st.session_state:
|
108 |
+
st.session_state['pp_eda_report'] = None
|
109 |
+
if 'sw_eda_report' not in st.session_state:
|
110 |
+
st.session_state['sw_eda_report'] = None
|
111 |
+
|
112 |
+
|
113 |
+
eda = st.selectbox('EDA', ['Pandas Profiling', 'Sweetviz'])
|
114 |
+
|
115 |
+
# EDA Process-----------------------------------------------------------------------------------------------------------------------------------------------------------------
|
116 |
+
|
117 |
+
# Pandas Profiling============================================================================================================================================
|
118 |
+
|
119 |
+
if eda == 'Pandas Profiling':
|
120 |
+
if st.session_state['pp_eda_report'] is None:
|
121 |
+
#st.write('pp_eda_report is None')
|
122 |
+
EDA_report= pp.ProfileReport(st.session_state['data'], title="Pandas Profiling Report", explorative=True, dark_mode=True)
|
123 |
+
st.session_state['pp_eda_report'] = EDA_report
|
124 |
+
|
125 |
+
st.subheader("Pandas Profiling EDA Report")
|
126 |
+
st_profile_report(st.session_state['pp_eda_report'])
|
127 |
+
|
128 |
+
# Pandas Profiling End========================================================================================================================================
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
# Sweetviz====================================================================================================================================================
|
133 |
+
def st_display_sweetviz(report_html):
|
134 |
+
report_file = codecs.open(report_html, 'r')
|
135 |
+
page = report_file.read()
|
136 |
+
return page
|
137 |
+
|
138 |
+
|
139 |
+
if eda == 'Sweetviz':
|
140 |
+
report = sv.analyze(st.session_state['data'])
|
141 |
+
report.show_html('report.html', open_browser=False)
|
142 |
+
if st.session_state['sw_eda_report'] == None:
|
143 |
+
#st.write('sw_eda_report is None')
|
144 |
+
page = st_display_sweetviz('report.html')
|
145 |
+
st.session_state['sw_eda_report']=page
|
146 |
+
|
147 |
+
st.subheader("Sweetviz EDA Report")
|
148 |
+
components.html(st.session_state['sw_eda_report'], width=900, height=800, scrolling=True)
|
149 |
+
|
150 |
+
# Sweetviz End================================================================================================================================================
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
|
155 |
+
# Train & Test Data Comparision===============================================================================================================================
|
156 |
+
|
157 |
+
# Label Encoder
|
158 |
+
label = LabelEncoder()
|
159 |
+
|
160 |
+
# Encoded DataFrame
|
161 |
+
final_data = st.session_state['data_label'].apply(label.fit_transform)
|
162 |
+
|
163 |
+
|
164 |
+
# Spliting into X, y
|
165 |
+
X_label = final_data.iloc[:,1:]
|
166 |
+
y_label = final_data.iloc[:,0]
|
167 |
+
|
168 |
+
# from sklearn.model_selection import train_test_split
|
169 |
+
# split X and y into training and testing sets
|
170 |
+
Xl_train, Xl_test, yl_train, yl_test = train_test_split(X_label, y_label,
|
171 |
+
test_size = 0.2, random_state = 0)
|
172 |
+
|
173 |
+
compare_report = sv.compare([Xl_train, 'Train'], [Xl_test, 'Test'])
|
174 |
+
compare_report.show_html('compare.html', open_browser=False)
|
175 |
+
|
176 |
+
if st.button('Generate Comparison Report b/w Train & Test'):
|
177 |
+
page_compare = st_display_sweetviz('compare.html')
|
178 |
+
|
179 |
+
st.subheader("Comparison Report b/w Train & Test DataSet")
|
180 |
+
components.html(page_compare, width=900, height=800, scrolling=True)
|
181 |
+
|
182 |
+
|
183 |
+
# Train & Test Data Comparision End===========================================================================================================================
|
184 |
+
|
185 |
+
|
186 |
+
except:
|
187 |
+
st.error('Go to **Main Page** and Upload the DataSet')
|
188 |
+
|
189 |
+
|
190 |
+
|
191 |
+
|
192 |
+
def model_validation():
|
193 |
+
try:
|
194 |
+
# Import the Libraries
|
195 |
+
# import matplotlib.pyplot as plt
|
196 |
+
# import seaborn as sns
|
197 |
+
#import numpy as np
|
198 |
+
|
199 |
+
|
200 |
+
st.image('mushroom41.jpg', 'Mushroom Classification')
|
201 |
+
st.header('**Model Validation**')
|
202 |
+
st.write('---')
|
203 |
+
|
204 |
+
|
205 |
+
data_label = st.session_state['data_label']
|
206 |
+
|
207 |
+
|
208 |
+
# Data Preprocessing i.e. Label Encoding--------------------------------------------------------------------------------------------------------------------------------------
|
209 |
+
|
210 |
+
|
211 |
+
#from sklearn.preprocessing import LabelEncoder
|
212 |
+
|
213 |
+
# Label Encoder
|
214 |
+
label = LabelEncoder()
|
215 |
+
|
216 |
+
# Encoded DataFrame
|
217 |
+
final_data = data_label.apply(label.fit_transform)
|
218 |
+
st.subheader('Encoded DataSet use for Train and Test')
|
219 |
+
st.dataframe(final_data)
|
220 |
+
|
221 |
+
# Data Encoding End-----------------------------------------------------------------------------------------------------------------------------------------------------------
|
222 |
+
|
223 |
+
# Model Building--------------------------------------------------------------------------------------------------------------------------------------------------------------
|
224 |
+
|
225 |
+
# Import necessary Libraries
|
226 |
+
# from sklearn.tree import DecisionTreeClassifier
|
227 |
+
# from sklearn.model_selection import train_test_split
|
228 |
+
# from sklearn.metrics import classification_report , accuracy_score , f1_score, confusion_matrix
|
229 |
+
|
230 |
+
|
231 |
+
# Spliting into X, y
|
232 |
+
X_label = final_data.iloc[:,1:]
|
233 |
+
y_label = final_data.iloc[:,0]
|
234 |
+
|
235 |
+
# split X and y into training and testing sets
|
236 |
+
Xl_train, Xl_test, yl_train, yl_test = train_test_split(X_label, y_label,
|
237 |
+
test_size = 0.2, random_state = 0)
|
238 |
+
|
239 |
+
# Model Training
|
240 |
+
tre = DecisionTreeClassifier(criterion='entropy', max_depth=8, min_samples_split=2, random_state=0)
|
241 |
+
tre.fit(Xl_train, yl_train)
|
242 |
+
|
243 |
+
# Model Building End----------------------------------------------------------------------------------------------------------------------------------------------------------
|
244 |
+
|
245 |
+
# Model Validation------------------------------------------------------------------------------------------------------------------------------------------------------------
|
246 |
+
|
247 |
+
# Training Validation_______________________________________________________________________________________
|
248 |
+
|
249 |
+
#Predict for X dataset
|
250 |
+
y_train_predict_tree = tre.predict(Xl_train)
|
251 |
+
|
252 |
+
# Training Accuracy and F1-score
|
253 |
+
train_acc_score = accuracy_score(yl_train, y_train_predict_tree)
|
254 |
+
train_f1_score = f1_score(yl_train, y_train_predict_tree)
|
255 |
+
|
256 |
+
st.subheader('Training Accuracy')
|
257 |
+
st.write('Train Accuracy Score : ' , train_acc_score)
|
258 |
+
st.write('Train F1 Score : ' , train_f1_score)
|
259 |
+
|
260 |
+
# print classification report
|
261 |
+
st.text('Model Report on Training DataSet:\n '
|
262 |
+
+classification_report(yl_train, y_train_predict_tree, digits=4))
|
263 |
+
|
264 |
+
|
265 |
+
# Confusion Matrix for Train Data
|
266 |
+
cm = pd.DataFrame(confusion_matrix(yl_train,y_train_predict_tree),
|
267 |
+
columns=['Edible', 'Poisonous'], index=['Edible', 'Poisonous'])
|
268 |
+
|
269 |
+
sns.set_theme(style='dark')
|
270 |
+
sns.set(rc={'axes.facecolor':'#282828', 'figure.facecolor':'#282828'})
|
271 |
+
|
272 |
+
fig, ax = plt.subplots()
|
273 |
+
sns.heatmap(cm,annot=True,fmt='.0f', ax=ax)
|
274 |
+
#ax.tick_params(grid_color='r', labelcolor='r', color='r')
|
275 |
+
|
276 |
+
plt.xlabel('Predictions', fontsize=18)
|
277 |
+
plt.ylabel('Actuals', fontsize=18)
|
278 |
+
plt.title('Confusion Matrix', fontsize=18)
|
279 |
+
ax.xaxis.label.set_color('white')
|
280 |
+
ax.yaxis.label.set_color('white')
|
281 |
+
ax.title.set_color('white')
|
282 |
+
ax.tick_params(colors='white')
|
283 |
+
ax.figure.axes[-1].tick_params(colors='white')
|
284 |
+
plt.show()
|
285 |
+
st.write(fig)
|
286 |
+
|
287 |
+
st.text("")
|
288 |
+
st.text("")
|
289 |
+
st.write('#')
|
290 |
+
|
291 |
+
# Training Validation End___________________________________________________________________________________
|
292 |
+
|
293 |
+
|
294 |
+
# Testing Validation________________________________________________________________________________________
|
295 |
+
|
296 |
+
#Predict for X dataset
|
297 |
+
y_test_predict_tree = tre.predict(Xl_test)
|
298 |
+
|
299 |
+
# Testing Accuracy and F1-Score
|
300 |
+
test_acc_score = accuracy_score(yl_test, y_test_predict_tree)
|
301 |
+
test_f1_score = f1_score(yl_test, y_test_predict_tree)
|
302 |
+
st.subheader('Testing Accuracy')
|
303 |
+
st.write('Test Accuracy Score : ' , test_acc_score)
|
304 |
+
st.write('Test F1 Score : ' , test_f1_score)
|
305 |
+
|
306 |
+
# print classification report
|
307 |
+
st.text('Model Report on Testing DataSet:\n '
|
308 |
+
+classification_report(yl_test, y_test_predict_tree, digits=4))
|
309 |
+
|
310 |
+
|
311 |
+
# Confusion Matrix for Test Data
|
312 |
+
cm = pd.DataFrame(confusion_matrix(yl_test,y_test_predict_tree),
|
313 |
+
columns=['Edible', 'Poisonous'], index=['Edible', 'Poisonous'])
|
314 |
+
|
315 |
+
sns.set_theme(style='dark')
|
316 |
+
sns.set(rc={'axes.facecolor':'#282828', 'figure.facecolor':'#282828'})
|
317 |
+
|
318 |
+
fig, ax = plt.subplots()
|
319 |
+
sns.heatmap(cm,annot=True,fmt='.0f', ax=ax)
|
320 |
+
#ax.tick_params(grid_color='r', labelcolor='r', color='r')
|
321 |
+
|
322 |
+
plt.xlabel('Predictions', fontsize=18)
|
323 |
+
plt.ylabel('Actuals', fontsize=18)
|
324 |
+
plt.title('Confusion Matrix', fontsize=18)
|
325 |
+
ax.xaxis.label.set_color('white')
|
326 |
+
ax.yaxis.label.set_color('white')
|
327 |
+
ax.title.set_color('white')
|
328 |
+
ax.tick_params(colors='white')
|
329 |
+
ax.figure.axes[-1].tick_params(colors='white')
|
330 |
+
plt.show()
|
331 |
+
st.write(fig)
|
332 |
+
|
333 |
+
# Test Validation End_______________________________________________________________________________________
|
334 |
+
|
335 |
+
# Model Validation End-------------------------------------------------------------------------------------------------------------------------------------------------------------
|
336 |
+
|
337 |
+
|
338 |
+
except:
|
339 |
+
st.error('Go to **Main Page** and Upload the DataSet')
|
340 |
+
|
341 |
+
|
342 |
+
|
343 |
+
|
344 |
+
|
345 |
+
|
346 |
+
|
347 |
+
def model_test():
|
348 |
+
try:
|
349 |
+
st.image('mushroom31.jpg', 'Mushroom Classification')
|
350 |
+
st.header('**Model Prediction**')
|
351 |
+
st.write('---')
|
352 |
+
|
353 |
+
# Input DataSet Used to Train the Model-----------------------------------------------------------------------------------------------------------------------------------------
|
354 |
+
|
355 |
+
data = st.session_state['data']
|
356 |
+
|
357 |
+
|
358 |
+
# File to be Classify-----------------------------------------------------------------------------------------------------------------------------------------------------------
|
359 |
+
|
360 |
+
# Upload that file you want to be Classify
|
361 |
+
st.sidebar.write('**Upload the Data you want to Classify**')
|
362 |
+
test_file = st.sidebar.file_uploader('Upload DataSet In "csv" formate', type = 'csv', key='b')
|
363 |
+
|
364 |
+
# If File is not Uploaded
|
365 |
+
if test_file == None:
|
366 |
+
st.error('Please Upload the file')
|
367 |
+
st.stop()
|
368 |
+
|
369 |
+
# If File is Uploaded
|
370 |
+
else:
|
371 |
+
test_data = pd.read_csv(test_file)
|
372 |
+
st.subheader('DataSet to be Classify')
|
373 |
+
st.dataframe(test_data)
|
374 |
+
|
375 |
+
# Column that are used for Test DataSet
|
376 |
+
use_test_cols = ['spore-print-color','gill-color','gill-size','stalk-root',
|
377 |
+
'habitat','stalk-shape','odor','population']
|
378 |
+
|
379 |
+
# What to do if Columns is Present OR Not Present
|
380 |
+
try:
|
381 |
+
test_data = test_data[use_test_cols]
|
382 |
+
|
383 |
+
except:
|
384 |
+
st.error('File to be Classify is not correct, Please upload the correct file and your file contain below columns')
|
385 |
+
st.write(pd.DataFrame(use_test_cols, columns=['columns']))
|
386 |
+
st.stop()
|
387 |
+
|
388 |
+
# File to be Classify End-------------------------------------------------------------------------------------------------------------------------------------------------------
|
389 |
+
|
390 |
+
# Model Building and Training---------------------------------------------------------------------------------------------------------------------------------------------------
|
391 |
+
|
392 |
+
# from sklearn.tree import DecisionTreeClassifier
|
393 |
+
|
394 |
+
# Columns use to Train the Model
|
395 |
+
use_cols = ['class','spore-print-color','gill-color','gill-size','stalk-root',
|
396 |
+
'habitat','stalk-shape','odor','population']
|
397 |
+
|
398 |
+
# Final Training DataFrame
|
399 |
+
data_label = data[use_cols]
|
400 |
+
|
401 |
+
# Label Encoding
|
402 |
+
label = LabelEncoder()
|
403 |
+
final_data = data_label.apply(label.fit_transform)
|
404 |
+
|
405 |
+
# Spliting into X, y
|
406 |
+
X_label = final_data.iloc[:,1:]
|
407 |
+
y_label = final_data.iloc[:,0]
|
408 |
+
|
409 |
+
# Model Training
|
410 |
+
tre = DecisionTreeClassifier(criterion='entropy', max_depth=8, min_samples_split=2, random_state=0)
|
411 |
+
tre.fit(X_label, y_label)
|
412 |
+
|
413 |
+
# Model Building End------------------------------------------------------------------------------------------------------------------------------------------------------------
|
414 |
+
|
415 |
+
|
416 |
+
|
417 |
+
# Final Prediction--------------------------------------------------------------------------------------------------------------------------------------------------------------
|
418 |
+
|
419 |
+
# Dictionary use for Mapping the Test Data besed on Train Data
|
420 |
+
dt_class = {0:'Edible', 1:'Poisonous'}
|
421 |
+
dt_spore_print_color = {'k': 2, 'n': 3, 'u': 6, 'h': 1, 'w': 7, 'r': 5, 'o': 4, 'y': 8, 'b': 0}
|
422 |
+
dt_gill_color = {'k': 4, 'n': 5, 'g': 2, 'p': 7, 'w': 10, 'h': 3, 'u': 9, 'e': 1, 'b': 0, 'r': 8, 'y': 11, 'o': 6}
|
423 |
+
dt_gill_size = {'n': 1, 'b': 0}
|
424 |
+
dt_stalk_root = {'e': 3, 'c': 2, 'b': 1, 'r': 4, '?': 0}
|
425 |
+
dt_habitat = {'u': 5, 'g': 1, 'm': 3, 'd': 0, 'p': 4, 'w': 6, 'l': 2}
|
426 |
+
dt_stalk_shape = {'e': 0, 't': 1}
|
427 |
+
dt_odor = {'p': 6, 'a': 0, 'l': 3, 'n': 5, 'f': 2, 'c': 1, 'y': 8, 's': 7, 'm': 4}
|
428 |
+
dt_population = {'s': 3, 'n': 2, 'a': 0, 'v': 4, 'y': 5, 'c': 1}
|
429 |
+
|
430 |
+
|
431 |
+
# Single Dictionary with Key name as Column name
|
432 |
+
map_label = {'spore-print-color':dt_spore_print_color, 'gill-color':dt_gill_color, 'gill-size':dt_gill_size,
|
433 |
+
'stalk-root':dt_stalk_root, 'habitat':dt_habitat, 'stalk-shape':dt_stalk_shape, 'odor':dt_odor,
|
434 |
+
'population':dt_population, 'class':dt_class}
|
435 |
+
|
436 |
+
|
437 |
+
# Label Encoding done with "map" command and map_label dictionary
|
438 |
+
test_label = pd.DataFrame()
|
439 |
+
for col in use_test_cols:
|
440 |
+
test_label[col] = test_data[col].map(map_label[col])
|
441 |
+
|
442 |
+
#st.dataframe(test_label)
|
443 |
+
|
444 |
+
# Test Prediction
|
445 |
+
y_pred_tree = tre.predict(test_label)
|
446 |
+
|
447 |
+
# Prediction DataFrame with Test Input
|
448 |
+
pred_data = test_data.copy()
|
449 |
+
pred_data['class'] = y_pred_tree
|
450 |
+
pred_data['class'] = pred_data['class'].map({0:'Edible', 1:'Poisonous'})
|
451 |
+
|
452 |
+
# Fuction define to color the dataframe
|
453 |
+
def color_df(clas):
|
454 |
+
if clas == 'Poisonous':
|
455 |
+
color = 'tomato'
|
456 |
+
elif clas == 'Edible':
|
457 |
+
color = 'green'
|
458 |
+
else:
|
459 |
+
color = 'dimgrey'
|
460 |
+
|
461 |
+
return f'background-color: {color}'
|
462 |
+
|
463 |
+
|
464 |
+
# Final DataFrame
|
465 |
+
st.subheader('Classified Data or Output')
|
466 |
+
st.dataframe(pred_data.style.applymap(color_df, subset=['class']))
|
467 |
+
|
468 |
+
|
469 |
+
|
470 |
+
# Value Counts of Final Dataframe
|
471 |
+
dt = {'Mushroom_Classification':pred_data['class'].value_counts().index.tolist(),
|
472 |
+
'Counts':pred_data['class'].value_counts().values.tolist()}
|
473 |
+
value_counts = pd.DataFrame(dt)
|
474 |
+
st.subheader('Value Counts of Classified Data')
|
475 |
+
st.dataframe(value_counts.style.applymap(color_df, subset=['Mushroom_Classification']))
|
476 |
+
|
477 |
+
# Final Pridiction Over---------------------------------------------------------------------------------------------------------------------------------------------------------
|
478 |
+
|
479 |
+
except:
|
480 |
+
st.warning('Or Go to **Main Page** and Upload the DataSet')
|
481 |
+
|
482 |
+
|
483 |
+
def made_by():
|
484 |
+
st.header('**Made By**')
|
485 |
+
st.write('---')
|
486 |
+
col1, col2, col3= st.columns([3,6,4])
|
487 |
+
|
488 |
+
|
489 |
+
with col1:
|
490 |
+
st.subheader("**Name**")
|
491 |
+
st.write('Ayush Patidar')
|
492 |
+
st.write('Anup Vetal')
|
493 |
+
st.write('Farzan Nawaz')
|
494 |
+
st.write('Prashant Khandekar')
|
495 |
+
st.write('Prasad Waje')
|
496 |
+
|
497 |
+
with col2:
|
498 |
+
st.subheader("**Mail**")
|
499 |
+
st.write('[email protected]')
|
500 |
+
st.write('[email protected]')
|
501 |
+
st.write('[email protected]')
|
502 |
+
st.write('[email protected]')
|
503 |
+
st.write('[email protected]')
|
504 |
+
|
505 |
+
with col3:
|
506 |
+
st.subheader("**Mob. No.**")
|
507 |
+
st.write('9131985346')
|
508 |
+
st.write('8668314822')
|
509 |
+
st.write('7898480467')
|
510 |
+
st.write('7030870449')
|
511 |
+
st.write('8999714455')
|
512 |
+
|
513 |
+
|
514 |
+
page_names_to_funcs = {
|
515 |
+
"Main Page": main_page,
|
516 |
+
"EDA": eda,
|
517 |
+
"Model Validation": model_validation,
|
518 |
+
"Data Classification": model_test,
|
519 |
+
"Made By": made_by
|
520 |
+
}
|
521 |
+
|
522 |
+
st.sidebar.header('Select a Task')
|
523 |
+
st.sidebar.write('---')
|
524 |
+
selected_page = st.sidebar.selectbox("Select", page_names_to_funcs.keys())
|
525 |
+
page_names_to_funcs[selected_page]()
|
526 |
+
|