narinsak unawong commited on
Commit
fbb40ed
·
verified ·
1 Parent(s): 64afd26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -38
app.py CHANGED
@@ -6,10 +6,16 @@ from sklearn.preprocessing import StandardScaler
6
  from sklearn.neighbors import KNeighborsClassifier
7
  from sklearn.metrics import classification_report
8
 
9
- # Load your data (replace with your actual file path)
10
- df = pd.read_csv('penguins_lter.csv')
 
 
 
 
 
11
 
12
- # Data preprocessing (same as in your previous code)
 
13
  numeric_cols = df.select_dtypes(include=['number']).columns
14
  for col in numeric_cols:
15
  df[col].fillna(df[col].mean(), inplace=True)
@@ -18,56 +24,39 @@ categorical_cols = df.select_dtypes(exclude=['number']).columns
18
  for col in categorical_cols:
19
  df[col].fillna(df[col].mode()[0], inplace=True)
20
 
21
- # Feature Engineering and Model Training (same as in your previous code)
 
 
 
22
  X = df.drop('Species', axis=1)
23
  y = df['Species']
 
 
24
  X = pd.get_dummies(X, drop_first=True)
 
 
25
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
 
 
27
  pipeline = Pipeline([
28
  ('scaler', StandardScaler()),
29
  ('knn', KNeighborsClassifier(n_neighbors=5))
30
  ])
 
 
31
  pipeline.fit(X_train, y_train)
 
 
32
  y_pred = pipeline.predict(X_test)
33
- report = classification_report(y_test, y_pred, output_dict=True)
34
 
35
  # Streamlit app
36
  st.title("Penguin Species Classification")
37
 
38
- st.write("This app predicts the species of a penguin based on its features.")
39
 
40
- # Display the classification report
41
  st.subheader("Classification Report")
42
- st.write(pd.DataFrame(report).transpose())
43
-
44
-
45
- # Add input fields for user input (example)
46
- st.sidebar.header("Penguin Features")
47
-
48
- # Example input fields (replace with your actual features)
49
- bill_length_mm = st.sidebar.number_input("Bill Length (mm)", min_value=0.0, value=40.0)
50
- bill_depth_mm = st.sidebar.number_input("Bill Depth (mm)", min_value=0.0, value=15.0)
51
- # ... Add more input fields for other features ...
52
-
53
- #Create a dictionary to store the user inputs
54
- user_input_dict = {
55
- 'bill_length_mm': bill_length_mm,
56
- 'bill_depth_mm': bill_depth_mm,
57
- # ... Add other features here
58
- }
59
-
60
- # Create a dataframe for prediction
61
- user_input_df = pd.DataFrame([user_input_dict])
62
- user_input_df = pd.get_dummies(user_input_df, drop_first=True) # Apply the same one-hot encoding
63
-
64
-
65
- if st.sidebar.button("Predict"):
66
- # Align the columns of user_input_df and X_train
67
- missing_cols = set(X_train.columns) - set(user_input_df.columns)
68
- for c in missing_cols:
69
- user_input_df[c] = 0 # Add missing columns with value 0
70
- user_input_df = user_input_df[X_train.columns] # Reorder the columns
71
 
72
- prediction = pipeline.predict(user_input_df)
73
- st.write(f"Predicted Species: {prediction[0]}")
 
6
  from sklearn.neighbors import KNeighborsClassifier
7
  from sklearn.metrics import classification_report
8
 
9
+ # Load your data (replace with your actual data loading)
10
+ # Assuming you have a CSV file named 'penguins_lter.csv' in your working directory
11
+ try:
12
+ df = pd.read_csv('penguins_lter.csv')
13
+ except FileNotFoundError:
14
+ st.error("Error: 'penguins_lter.csv' not found. Please upload the file or adjust the path.")
15
+ st.stop()
16
 
17
+
18
+ # Data preprocessing (handle missing values)
19
  numeric_cols = df.select_dtypes(include=['number']).columns
20
  for col in numeric_cols:
21
  df[col].fillna(df[col].mean(), inplace=True)
 
24
  for col in categorical_cols:
25
  df[col].fillna(df[col].mode()[0], inplace=True)
26
 
27
+
28
+ # Model training and prediction (same as your original code)
29
+
30
+ # Assuming 'Species' is your target variable
31
  X = df.drop('Species', axis=1)
32
  y = df['Species']
33
+
34
+ # Convert categorical features to numerical using one-hot encoding
35
  X = pd.get_dummies(X, drop_first=True)
36
+
37
+ # Split data into training and testing sets
38
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
39
 
40
+ # Create a pipeline
41
  pipeline = Pipeline([
42
  ('scaler', StandardScaler()),
43
  ('knn', KNeighborsClassifier(n_neighbors=5))
44
  ])
45
+
46
+ # Train the pipeline
47
  pipeline.fit(X_train, y_train)
48
+
49
+ # Make predictions
50
  y_pred = pipeline.predict(X_test)
51
+
52
 
53
  # Streamlit app
54
  st.title("Penguin Species Classification")
55
 
56
+ st.write("This app predicts the species of a penguin based on its physical characteristics.")
57
 
58
+ # Display classification report
59
  st.subheader("Classification Report")
60
+ st.text(classification_report(y_test, y_pred))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ st.dataframe(df.head())