import streamlit as st import pickle import pandas as pd from sklearn.pipeline import Pipeline import numpy as np # Load the model and encoders with open('model_penguin_706.pkl', 'rb') as file: model, species_encoder, island_encoder, sex_encoder = pickle.load(file) # Streamlit app layout st.title('Penguin Species Prediction') # Create user input fields st.sidebar.header('Input Features') # User inputs species = st.sidebar.selectbox('Species', species_encoder.classes_) island = st.sidebar.selectbox('Island', island_encoder.classes_) sex = st.sidebar.selectbox('Sex', sex_encoder.classes_) # Slider for numeric inputs bill_length_mm = st.sidebar.slider('Bill Length (mm)', 30.0, 60.0, 45.0) bill_depth_mm = st.sidebar.slider('Bill Depth (mm)', 10.0, 25.0, 18.0) flipper_length_mm = st.sidebar.slider('Flipper Length (mm)', 170.0, 240.0, 200.0) body_mass_g = st.sidebar.slider('Body Mass (g)', 2500.0, 6000.0, 4000.0) # Add missing columns with default values culmen_length_mm = 40.0 # Default value, update with realistic values if available culmen_depth_mm = 15.0 # Default value, update with realistic values if available # Prepare the input data (ensure columns are in the same order as expected by the model) input_data = pd.DataFrame({ 'species': [species], 'island': [island], 'sex': [sex], 'bill_length_mm': [bill_length_mm], 'bill_depth_mm': [bill_depth_mm], 'flipper_length_mm': [flipper_length_mm], 'body_mass_g': [body_mass_g], 'culmen_length_mm': [culmen_length_mm], 'culmen_depth_mm': [culmen_depth_mm] }) # Check for NaN values in the input data if input_data.isna().any().any(): st.warning("Input data contains NaN values. Filling with default values.") input_data = input_data.fillna(0) # Replace NaN values with 0 or appropriate value # Apply encoding to categorical features (check column names here!) input_data['species'] = species_encoder.transform(input_data['species']) input_data['island'] = island_encoder.transform(input_data['island']) input_data['sex'] = sex_encoder.transform(input_data['sex']) # Ensure the columns are in the correct order if isinstance(model, Pipeline): preprocessor = model.named_steps.get('preprocessor') # Replace with actual step name if different if preprocessor: input_data = preprocessor.transform(input_data) # Apply any necessary transformations # Make prediction prediction = model.predict(input_data) # Show the result st.write(f'Predicted Species: {species_encoder.inverse_transform(prediction)}')