from typing import Any, Optional from smolagents.tools import Tool import io import requests import transformers import PIL class ImageDescriptionTool(Tool): name = "describe_image" description = """ Generates a detailed description of an image from a given URL. Uses the BLIP image captioning model to provide accurate descriptions. """ inputs = {'image_url': {'type': 'string', 'description': 'URL of the image to be described (must be publicly accessible)', 'required': True}} output_type = "string" requirements = ['Pillow', 'requests', 'transformers', 'torch'] def __init__(self): super().__init__() # Import dependencies here to ensure they're available import requests from PIL import Image # PIL is provided by Pillow from io import BytesIO from transformers import pipeline self.requests = requests self.Image = Image self.BytesIO = BytesIO self.pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") def forward(self, image_url: str) -> str: try: # Download the image response = self.requests.get(image_url, timeout=10) response.raise_for_status() # Open and validate the image image = self.Image.open(self.BytesIO(response.content)) # Generate description description = self.pipeline(image)[0]['generated_text'] return f"Description of the image: {description}" except Exception as e: return f"Error processing image: {str(e)}"