import streamlit as st
import torch
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
from PIL import Image

# Load the model and processor
model_id = "brucewayne0459/paligemma_derm"
processor = AutoProcessor.from_pretrained(model_id)
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
model.eval()

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Add Hugging Face logo at the top
st.markdown(
    """
    <style>
    .huggingface-logo {
        display: flex;
        justify-content: center;
        margin-bottom: 20px;
    }
    .huggingface-logo img {
        width: 150px;
    }
    </style>
    <div class="huggingface-logo">
        <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face Logo">
    </div>
    """,
    unsafe_allow_html=True,
)

# Streamlit app title and instructions
st.title("VisionDerm")
st.write("Upload an image or use your camera to identify the skin condition.")

# Column layout for input and display
col1, col2 = st.columns([3, 2])

with col1:
    # File uploader for image
    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
    # Camera input for photo capture
    camera_photo = st.camera_input("Take a photo")
    prompt = 'Identify the skin condition?'

# Choose input image (either uploaded or taken by camera)
input_image = None
if camera_photo:
    input_image = Image.open(camera_photo)
elif uploaded_file:
    input_image = Image.open(uploaded_file)

with col2:
    if input_image:
        # Display the image
        resized_image = input_image.resize((300, 300))
        st.image(resized_image, caption="Selected Image (300x300)", use_container_width=True)

        # Resize image for processing (512x512 pixels)
        max_size = (512, 512)
        processed_image = input_image.resize(max_size)

        # Predict automatically when the image is uploaded or captured
        with st.spinner("Processing..."):
            try:
                # Prepare inputs
                inputs = processor(
                    text=prompt,
                    images=processed_image,
                    return_tensors="pt",
                    padding="longest"
                ).to(device)

                # Generate output
                default_max_tokens = 50  # Set a default value for max tokens
                with torch.no_grad():
                    outputs = model.generate(**inputs, max_new_tokens=default_max_tokens)

                # Decode output and remove the prompt text
                decoded_output = processor.decode(outputs[0], skip_special_tokens=True)
                if prompt in decoded_output:
                    decoded_output = decoded_output.replace(prompt, "").strip()

                # Capitalize the first letter of each word
                decoded_output = decoded_output.title()

                # Display result
                st.success("Analysis Complete!")
                st.write("**Model Output:**", decoded_output)

            except Exception as e:
                st.error(f"Error: {str(e)}")

# Add team information at the bottom
st.markdown("---")  # Add a horizontal line for separation
st.info("""
### Team: Mahasigma Berprestasi
- **Muhammad Karov Ardava Barus** ; 103052300001
- **Akmal Yaasir Fauzaan** ; 103052300008
- **Farand Diy Dat Mahazalfaa** ; 103052300050
- **Hauzan Rafi Attallah**; 103052330011
""")