import streamlit as st
import pickle
import pandas as pd
from sklearn.pipeline import Pipeline
import numpy as np

# Load the model and encoders
with open('model_penguin_706.pkl', 'rb') as file:
    model, species_encoder, island_encoder, sex_encoder = pickle.load(file)

# Streamlit app layout
st.title('Penguin Species Prediction')

# Create user input fields
st.sidebar.header('Input Features')

# User inputs
species = st.sidebar.selectbox('Species', species_encoder.classes_)
island = st.sidebar.selectbox('Island', island_encoder.classes_)
sex = st.sidebar.selectbox('Sex', sex_encoder.classes_)

# Slider for numeric inputs
bill_length_mm = st.sidebar.slider('Bill Length (mm)', 30.0, 60.0, 45.0)
bill_depth_mm = st.sidebar.slider('Bill Depth (mm)', 10.0, 25.0, 18.0)
flipper_length_mm = st.sidebar.slider('Flipper Length (mm)', 170.0, 240.0, 200.0)
body_mass_g = st.sidebar.slider('Body Mass (g)', 2500.0, 6000.0, 4000.0)

# Add missing columns with default values
culmen_length_mm = 40.0  # Default value, update with realistic values if available
culmen_depth_mm = 15.0  # Default value, update with realistic values if available

# Prepare the input data (ensure columns are in the same order as expected by the model)
input_data = pd.DataFrame({
    'species': [species],
    'island': [island],
    'sex': [sex],
    'bill_length_mm': [bill_length_mm],
    'bill_depth_mm': [bill_depth_mm],
    'flipper_length_mm': [flipper_length_mm],
    'body_mass_g': [body_mass_g],
    'culmen_length_mm': [culmen_length_mm],
    'culmen_depth_mm': [culmen_depth_mm]
})

# Check for NaN values in the input data
if input_data.isna().any().any():
    st.warning("Input data contains NaN values. Filling with default values.")
    input_data = input_data.fillna(0)  # Replace NaN values with 0 or appropriate value

# Apply encoding to categorical features (check column names here!)
input_data['species'] = species_encoder.transform(input_data['species'])
input_data['island'] = island_encoder.transform(input_data['island'])
input_data['sex'] = sex_encoder.transform(input_data['sex'])

# Ensure the columns are in the correct order
if isinstance(model, Pipeline):
    preprocessor = model.named_steps.get('preprocessor')  # Replace with actual step name if different
    if preprocessor:
        input_data = preprocessor.transform(input_data)  # Apply any necessary transformations

# Make prediction
prediction = model.predict(input_data)

# Show the result
st.write(f'Predicted Species: {species_encoder.inverse_transform(prediction)}')