Paatiii1712 commited on
Commit
9492b91
·
1 Parent(s): ed5235b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +526 -0
app.py ADDED
@@ -0,0 +1,526 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the Libraries
2
+ import streamlit as st
3
+ import pandas as pd
4
+ from sklearn.preprocessing import LabelEncoder
5
+
6
+ # Libraries for EDA
7
+ import pandas_profiling as pp
8
+ from streamlit_pandas_profiling import st_profile_report
9
+ import sweetviz as sv
10
+ import codecs
11
+ import streamlit.components.v1 as components
12
+
13
+ # Libraries for Model Validation and Test
14
+ from sklearn.tree import DecisionTreeClassifier
15
+ from sklearn.model_selection import train_test_split
16
+ from sklearn.metrics import classification_report , accuracy_score , f1_score, confusion_matrix
17
+ import matplotlib.pyplot as plt
18
+ import seaborn as sns
19
+
20
+ st.set_page_config(page_title='Mushroom Classification', page_icon='mushroom1.jpg')
21
+ #,layout = "wide" , initial_sidebar_state="collapsed")
22
+
23
+
24
+ if 'data' not in st.session_state:
25
+ st.session_state['data'] = pd.DataFrame()
26
+ if 'data_label' not in st.session_state:
27
+ st.session_state['data_label'] = pd.DataFrame()
28
+
29
+ if 'start' not in st.session_state:
30
+ st.session_state['start'] = 0
31
+
32
+
33
+
34
+ def main_page():
35
+ st.image('mushroom3.jpg')
36
+ st.markdown("<h1 style='text-align: center;'>Mushroom Classification</h1>", unsafe_allow_html=True)
37
+ st.write('---')
38
+ st.session_state['start'] = 1
39
+
40
+ def change():
41
+ st.session_state['data'] = pd.DataFrame()
42
+ st.session_state['data_label'] = pd.DataFrame()
43
+
44
+ # Input DataSet Used to Train the Model---------------------------------------------------------------------------------------------------------------------------------------
45
+
46
+ st.sidebar.header('User Input Parameters')
47
+ st.sidebar.write('---')
48
+ data_file = st.sidebar.selectbox(
49
+ label ="DataSet for Model Training",
50
+ options=['Default','Upload'], on_change=change())
51
+
52
+ # To use default file for training the model
53
+ if data_file == 'Default':
54
+ st.subheader('Input DataFrame')
55
+ data = pd.read_csv('mushrooms.csv')
56
+ st.session_state['data'] = data
57
+ st.dataframe(data)
58
+ use_cols = ['class','spore-print-color','gill-color','gill-size','stalk-root',
59
+ 'habitat','stalk-shape','odor','population']
60
+ data_label = data[use_cols]
61
+ st.session_state['data_label'] = data_label
62
+
63
+ # Upload another file
64
+ if data_file == 'Upload':
65
+ file = st.sidebar.file_uploader('Upload DataSet In "csv" formate', type = 'csv', key='a')
66
+
67
+ if file == None:
68
+ st.error('Please Upload the file')
69
+ st.stop()
70
+
71
+ else:
72
+ data = pd.read_csv(file)
73
+ st.session_state['data'] = data
74
+ st.subheader('Input DataSet')
75
+ st.dataframe(data)
76
+
77
+ # Columns use to Train the Model (columns which are more important, based on Feature Importance)
78
+ use_cols = ['class','spore-print-color','gill-color','gill-size','stalk-root',
79
+ 'habitat','stalk-shape','odor','population']
80
+
81
+ # What to do if Columns is Present OR Not Present
82
+ try:
83
+ data_label = data[use_cols]
84
+ st.session_state['data_label'] = data_label
85
+
86
+ except:
87
+ st.error('Please Upload the correct file, your file must contain below columns')
88
+ st.write(pd.DataFrame(use_cols, columns=['columns']))
89
+ st.stop()
90
+
91
+ # Input DataSet is Taken------------------------------------------------------------------------------------------------------------------------------------------------------
92
+
93
+
94
+ def eda():
95
+ # import pandas_profiling as pp
96
+ # from streamlit_pandas_profiling import st_profile_report
97
+
98
+ # import sweetviz as sv
99
+ # import codecs
100
+ # import streamlit.components.v1 as components
101
+
102
+ st.image('mushroom21.jpg', 'Mushroom Classification')
103
+ st.header('**EDA**')
104
+ st.write('---')
105
+
106
+ try:
107
+ if 'pp_eda_report' not in st.session_state:
108
+ st.session_state['pp_eda_report'] = None
109
+ if 'sw_eda_report' not in st.session_state:
110
+ st.session_state['sw_eda_report'] = None
111
+
112
+
113
+ eda = st.selectbox('EDA', ['Pandas Profiling', 'Sweetviz'])
114
+
115
+ # EDA Process-----------------------------------------------------------------------------------------------------------------------------------------------------------------
116
+
117
+ # Pandas Profiling============================================================================================================================================
118
+
119
+ if eda == 'Pandas Profiling':
120
+ if st.session_state['pp_eda_report'] is None:
121
+ #st.write('pp_eda_report is None')
122
+ EDA_report= pp.ProfileReport(st.session_state['data'], title="Pandas Profiling Report", explorative=True, dark_mode=True)
123
+ st.session_state['pp_eda_report'] = EDA_report
124
+
125
+ st.subheader("Pandas Profiling EDA Report")
126
+ st_profile_report(st.session_state['pp_eda_report'])
127
+
128
+ # Pandas Profiling End========================================================================================================================================
129
+
130
+
131
+
132
+ # Sweetviz====================================================================================================================================================
133
+ def st_display_sweetviz(report_html):
134
+ report_file = codecs.open(report_html, 'r')
135
+ page = report_file.read()
136
+ return page
137
+
138
+
139
+ if eda == 'Sweetviz':
140
+ report = sv.analyze(st.session_state['data'])
141
+ report.show_html('report.html', open_browser=False)
142
+ if st.session_state['sw_eda_report'] == None:
143
+ #st.write('sw_eda_report is None')
144
+ page = st_display_sweetviz('report.html')
145
+ st.session_state['sw_eda_report']=page
146
+
147
+ st.subheader("Sweetviz EDA Report")
148
+ components.html(st.session_state['sw_eda_report'], width=900, height=800, scrolling=True)
149
+
150
+ # Sweetviz End================================================================================================================================================
151
+
152
+
153
+
154
+
155
+ # Train & Test Data Comparision===============================================================================================================================
156
+
157
+ # Label Encoder
158
+ label = LabelEncoder()
159
+
160
+ # Encoded DataFrame
161
+ final_data = st.session_state['data_label'].apply(label.fit_transform)
162
+
163
+
164
+ # Spliting into X, y
165
+ X_label = final_data.iloc[:,1:]
166
+ y_label = final_data.iloc[:,0]
167
+
168
+ # from sklearn.model_selection import train_test_split
169
+ # split X and y into training and testing sets
170
+ Xl_train, Xl_test, yl_train, yl_test = train_test_split(X_label, y_label,
171
+ test_size = 0.2, random_state = 0)
172
+
173
+ compare_report = sv.compare([Xl_train, 'Train'], [Xl_test, 'Test'])
174
+ compare_report.show_html('compare.html', open_browser=False)
175
+
176
+ if st.button('Generate Comparison Report b/w Train & Test'):
177
+ page_compare = st_display_sweetviz('compare.html')
178
+
179
+ st.subheader("Comparison Report b/w Train & Test DataSet")
180
+ components.html(page_compare, width=900, height=800, scrolling=True)
181
+
182
+
183
+ # Train & Test Data Comparision End===========================================================================================================================
184
+
185
+
186
+ except:
187
+ st.error('Go to **Main Page** and Upload the DataSet')
188
+
189
+
190
+
191
+
192
+ def model_validation():
193
+ try:
194
+ # Import the Libraries
195
+ # import matplotlib.pyplot as plt
196
+ # import seaborn as sns
197
+ #import numpy as np
198
+
199
+
200
+ st.image('mushroom41.jpg', 'Mushroom Classification')
201
+ st.header('**Model Validation**')
202
+ st.write('---')
203
+
204
+
205
+ data_label = st.session_state['data_label']
206
+
207
+
208
+ # Data Preprocessing i.e. Label Encoding--------------------------------------------------------------------------------------------------------------------------------------
209
+
210
+
211
+ #from sklearn.preprocessing import LabelEncoder
212
+
213
+ # Label Encoder
214
+ label = LabelEncoder()
215
+
216
+ # Encoded DataFrame
217
+ final_data = data_label.apply(label.fit_transform)
218
+ st.subheader('Encoded DataSet use for Train and Test')
219
+ st.dataframe(final_data)
220
+
221
+ # Data Encoding End-----------------------------------------------------------------------------------------------------------------------------------------------------------
222
+
223
+ # Model Building--------------------------------------------------------------------------------------------------------------------------------------------------------------
224
+
225
+ # Import necessary Libraries
226
+ # from sklearn.tree import DecisionTreeClassifier
227
+ # from sklearn.model_selection import train_test_split
228
+ # from sklearn.metrics import classification_report , accuracy_score , f1_score, confusion_matrix
229
+
230
+
231
+ # Spliting into X, y
232
+ X_label = final_data.iloc[:,1:]
233
+ y_label = final_data.iloc[:,0]
234
+
235
+ # split X and y into training and testing sets
236
+ Xl_train, Xl_test, yl_train, yl_test = train_test_split(X_label, y_label,
237
+ test_size = 0.2, random_state = 0)
238
+
239
+ # Model Training
240
+ tre = DecisionTreeClassifier(criterion='entropy', max_depth=8, min_samples_split=2, random_state=0)
241
+ tre.fit(Xl_train, yl_train)
242
+
243
+ # Model Building End----------------------------------------------------------------------------------------------------------------------------------------------------------
244
+
245
+ # Model Validation------------------------------------------------------------------------------------------------------------------------------------------------------------
246
+
247
+ # Training Validation_______________________________________________________________________________________
248
+
249
+ #Predict for X dataset
250
+ y_train_predict_tree = tre.predict(Xl_train)
251
+
252
+ # Training Accuracy and F1-score
253
+ train_acc_score = accuracy_score(yl_train, y_train_predict_tree)
254
+ train_f1_score = f1_score(yl_train, y_train_predict_tree)
255
+
256
+ st.subheader('Training Accuracy')
257
+ st.write('Train Accuracy Score : ' , train_acc_score)
258
+ st.write('Train F1 Score : ' , train_f1_score)
259
+
260
+ # print classification report
261
+ st.text('Model Report on Training DataSet:\n '
262
+ +classification_report(yl_train, y_train_predict_tree, digits=4))
263
+
264
+
265
+ # Confusion Matrix for Train Data
266
+ cm = pd.DataFrame(confusion_matrix(yl_train,y_train_predict_tree),
267
+ columns=['Edible', 'Poisonous'], index=['Edible', 'Poisonous'])
268
+
269
+ sns.set_theme(style='dark')
270
+ sns.set(rc={'axes.facecolor':'#282828', 'figure.facecolor':'#282828'})
271
+
272
+ fig, ax = plt.subplots()
273
+ sns.heatmap(cm,annot=True,fmt='.0f', ax=ax)
274
+ #ax.tick_params(grid_color='r', labelcolor='r', color='r')
275
+
276
+ plt.xlabel('Predictions', fontsize=18)
277
+ plt.ylabel('Actuals', fontsize=18)
278
+ plt.title('Confusion Matrix', fontsize=18)
279
+ ax.xaxis.label.set_color('white')
280
+ ax.yaxis.label.set_color('white')
281
+ ax.title.set_color('white')
282
+ ax.tick_params(colors='white')
283
+ ax.figure.axes[-1].tick_params(colors='white')
284
+ plt.show()
285
+ st.write(fig)
286
+
287
+ st.text("")
288
+ st.text("")
289
+ st.write('#')
290
+
291
+ # Training Validation End___________________________________________________________________________________
292
+
293
+
294
+ # Testing Validation________________________________________________________________________________________
295
+
296
+ #Predict for X dataset
297
+ y_test_predict_tree = tre.predict(Xl_test)
298
+
299
+ # Testing Accuracy and F1-Score
300
+ test_acc_score = accuracy_score(yl_test, y_test_predict_tree)
301
+ test_f1_score = f1_score(yl_test, y_test_predict_tree)
302
+ st.subheader('Testing Accuracy')
303
+ st.write('Test Accuracy Score : ' , test_acc_score)
304
+ st.write('Test F1 Score : ' , test_f1_score)
305
+
306
+ # print classification report
307
+ st.text('Model Report on Testing DataSet:\n '
308
+ +classification_report(yl_test, y_test_predict_tree, digits=4))
309
+
310
+
311
+ # Confusion Matrix for Test Data
312
+ cm = pd.DataFrame(confusion_matrix(yl_test,y_test_predict_tree),
313
+ columns=['Edible', 'Poisonous'], index=['Edible', 'Poisonous'])
314
+
315
+ sns.set_theme(style='dark')
316
+ sns.set(rc={'axes.facecolor':'#282828', 'figure.facecolor':'#282828'})
317
+
318
+ fig, ax = plt.subplots()
319
+ sns.heatmap(cm,annot=True,fmt='.0f', ax=ax)
320
+ #ax.tick_params(grid_color='r', labelcolor='r', color='r')
321
+
322
+ plt.xlabel('Predictions', fontsize=18)
323
+ plt.ylabel('Actuals', fontsize=18)
324
+ plt.title('Confusion Matrix', fontsize=18)
325
+ ax.xaxis.label.set_color('white')
326
+ ax.yaxis.label.set_color('white')
327
+ ax.title.set_color('white')
328
+ ax.tick_params(colors='white')
329
+ ax.figure.axes[-1].tick_params(colors='white')
330
+ plt.show()
331
+ st.write(fig)
332
+
333
+ # Test Validation End_______________________________________________________________________________________
334
+
335
+ # Model Validation End-------------------------------------------------------------------------------------------------------------------------------------------------------------
336
+
337
+
338
+ except:
339
+ st.error('Go to **Main Page** and Upload the DataSet')
340
+
341
+
342
+
343
+
344
+
345
+
346
+
347
+ def model_test():
348
+ try:
349
+ st.image('mushroom31.jpg', 'Mushroom Classification')
350
+ st.header('**Model Prediction**')
351
+ st.write('---')
352
+
353
+ # Input DataSet Used to Train the Model-----------------------------------------------------------------------------------------------------------------------------------------
354
+
355
+ data = st.session_state['data']
356
+
357
+
358
+ # File to be Classify-----------------------------------------------------------------------------------------------------------------------------------------------------------
359
+
360
+ # Upload that file you want to be Classify
361
+ st.sidebar.write('**Upload the Data you want to Classify**')
362
+ test_file = st.sidebar.file_uploader('Upload DataSet In "csv" formate', type = 'csv', key='b')
363
+
364
+ # If File is not Uploaded
365
+ if test_file == None:
366
+ st.error('Please Upload the file')
367
+ st.stop()
368
+
369
+ # If File is Uploaded
370
+ else:
371
+ test_data = pd.read_csv(test_file)
372
+ st.subheader('DataSet to be Classify')
373
+ st.dataframe(test_data)
374
+
375
+ # Column that are used for Test DataSet
376
+ use_test_cols = ['spore-print-color','gill-color','gill-size','stalk-root',
377
+ 'habitat','stalk-shape','odor','population']
378
+
379
+ # What to do if Columns is Present OR Not Present
380
+ try:
381
+ test_data = test_data[use_test_cols]
382
+
383
+ except:
384
+ st.error('File to be Classify is not correct, Please upload the correct file and your file contain below columns')
385
+ st.write(pd.DataFrame(use_test_cols, columns=['columns']))
386
+ st.stop()
387
+
388
+ # File to be Classify End-------------------------------------------------------------------------------------------------------------------------------------------------------
389
+
390
+ # Model Building and Training---------------------------------------------------------------------------------------------------------------------------------------------------
391
+
392
+ # from sklearn.tree import DecisionTreeClassifier
393
+
394
+ # Columns use to Train the Model
395
+ use_cols = ['class','spore-print-color','gill-color','gill-size','stalk-root',
396
+ 'habitat','stalk-shape','odor','population']
397
+
398
+ # Final Training DataFrame
399
+ data_label = data[use_cols]
400
+
401
+ # Label Encoding
402
+ label = LabelEncoder()
403
+ final_data = data_label.apply(label.fit_transform)
404
+
405
+ # Spliting into X, y
406
+ X_label = final_data.iloc[:,1:]
407
+ y_label = final_data.iloc[:,0]
408
+
409
+ # Model Training
410
+ tre = DecisionTreeClassifier(criterion='entropy', max_depth=8, min_samples_split=2, random_state=0)
411
+ tre.fit(X_label, y_label)
412
+
413
+ # Model Building End------------------------------------------------------------------------------------------------------------------------------------------------------------
414
+
415
+
416
+
417
+ # Final Prediction--------------------------------------------------------------------------------------------------------------------------------------------------------------
418
+
419
+ # Dictionary use for Mapping the Test Data besed on Train Data
420
+ dt_class = {0:'Edible', 1:'Poisonous'}
421
+ dt_spore_print_color = {'k': 2, 'n': 3, 'u': 6, 'h': 1, 'w': 7, 'r': 5, 'o': 4, 'y': 8, 'b': 0}
422
+ dt_gill_color = {'k': 4, 'n': 5, 'g': 2, 'p': 7, 'w': 10, 'h': 3, 'u': 9, 'e': 1, 'b': 0, 'r': 8, 'y': 11, 'o': 6}
423
+ dt_gill_size = {'n': 1, 'b': 0}
424
+ dt_stalk_root = {'e': 3, 'c': 2, 'b': 1, 'r': 4, '?': 0}
425
+ dt_habitat = {'u': 5, 'g': 1, 'm': 3, 'd': 0, 'p': 4, 'w': 6, 'l': 2}
426
+ dt_stalk_shape = {'e': 0, 't': 1}
427
+ dt_odor = {'p': 6, 'a': 0, 'l': 3, 'n': 5, 'f': 2, 'c': 1, 'y': 8, 's': 7, 'm': 4}
428
+ dt_population = {'s': 3, 'n': 2, 'a': 0, 'v': 4, 'y': 5, 'c': 1}
429
+
430
+
431
+ # Single Dictionary with Key name as Column name
432
+ map_label = {'spore-print-color':dt_spore_print_color, 'gill-color':dt_gill_color, 'gill-size':dt_gill_size,
433
+ 'stalk-root':dt_stalk_root, 'habitat':dt_habitat, 'stalk-shape':dt_stalk_shape, 'odor':dt_odor,
434
+ 'population':dt_population, 'class':dt_class}
435
+
436
+
437
+ # Label Encoding done with "map" command and map_label dictionary
438
+ test_label = pd.DataFrame()
439
+ for col in use_test_cols:
440
+ test_label[col] = test_data[col].map(map_label[col])
441
+
442
+ #st.dataframe(test_label)
443
+
444
+ # Test Prediction
445
+ y_pred_tree = tre.predict(test_label)
446
+
447
+ # Prediction DataFrame with Test Input
448
+ pred_data = test_data.copy()
449
+ pred_data['class'] = y_pred_tree
450
+ pred_data['class'] = pred_data['class'].map({0:'Edible', 1:'Poisonous'})
451
+
452
+ # Fuction define to color the dataframe
453
+ def color_df(clas):
454
+ if clas == 'Poisonous':
455
+ color = 'tomato'
456
+ elif clas == 'Edible':
457
+ color = 'green'
458
+ else:
459
+ color = 'dimgrey'
460
+
461
+ return f'background-color: {color}'
462
+
463
+
464
+ # Final DataFrame
465
+ st.subheader('Classified Data or Output')
466
+ st.dataframe(pred_data.style.applymap(color_df, subset=['class']))
467
+
468
+
469
+
470
+ # Value Counts of Final Dataframe
471
+ dt = {'Mushroom_Classification':pred_data['class'].value_counts().index.tolist(),
472
+ 'Counts':pred_data['class'].value_counts().values.tolist()}
473
+ value_counts = pd.DataFrame(dt)
474
+ st.subheader('Value Counts of Classified Data')
475
+ st.dataframe(value_counts.style.applymap(color_df, subset=['Mushroom_Classification']))
476
+
477
+ # Final Pridiction Over---------------------------------------------------------------------------------------------------------------------------------------------------------
478
+
479
+ except:
480
+ st.warning('Or Go to **Main Page** and Upload the DataSet')
481
+
482
+
483
+ def made_by():
484
+ st.header('**Made By**')
485
+ st.write('---')
486
+ col1, col2, col3= st.columns([3,6,4])
487
+
488
+
489
+ with col1:
490
+ st.subheader("**Name**")
491
+ st.write('Ayush Patidar')
492
+ st.write('Anup Vetal')
493
+ st.write('Farzan Nawaz')
494
+ st.write('Prashant Khandekar')
495
+ st.write('Prasad Waje')
496
+
497
+ with col2:
498
+ st.subheader("**Mail**")
499
+ st.write('[email protected]')
500
+ st.write('[email protected]')
501
+ st.write('[email protected]')
502
+ st.write('[email protected]')
503
+ st.write('[email protected]')
504
+
505
+ with col3:
506
+ st.subheader("**Mob. No.**")
507
+ st.write('9131985346')
508
+ st.write('8668314822')
509
+ st.write('7898480467')
510
+ st.write('7030870449')
511
+ st.write('8999714455')
512
+
513
+
514
+ page_names_to_funcs = {
515
+ "Main Page": main_page,
516
+ "EDA": eda,
517
+ "Model Validation": model_validation,
518
+ "Data Classification": model_test,
519
+ "Made By": made_by
520
+ }
521
+
522
+ st.sidebar.header('Select a Task')
523
+ st.sidebar.write('---')
524
+ selected_page = st.sidebar.selectbox("Select", page_names_to_funcs.keys())
525
+ page_names_to_funcs[selected_page]()
526
+