from typing import Any, Optional
from smolagents.tools import Tool
import io
import requests
import transformers
import PIL

class ImageDescriptionTool(Tool):
    name = "describe_image"
    description = """
    Generates a detailed description of an image from a given URL.
    Uses the BLIP image captioning model to provide accurate descriptions.
    """
    inputs = {'image_url': {'type': 'string', 'description': 'URL of the image to be described (must be publicly accessible)', 'required': True}}
    output_type = "string"
    requirements = ['Pillow', 'requests', 'transformers', 'torch']

    def __init__(self):
        super().__init__()
        # Import dependencies here to ensure they're available
        import requests
        from PIL import Image  # PIL is provided by Pillow
        from io import BytesIO
        from transformers import pipeline

        self.requests = requests
        self.Image = Image
        self.BytesIO = BytesIO
        self.pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

    def forward(self, image_url: str) -> str:
        try:
            # Download the image
            response = self.requests.get(image_url, timeout=10)
            response.raise_for_status()

            # Open and validate the image
            image = self.Image.open(self.BytesIO(response.content))

            # Generate description
            description = self.pipeline(image)[0]['generated_text']

            return f"Description of the image: {description}"
        except Exception as e:
            return f"Error processing image: {str(e)}"