MehulJ's picture
Updated tool with improved error handling and documentation
e16e32d verified
from typing import Any, Optional
from smolagents.tools import Tool
import io
import requests
import transformers
import PIL
class ImageDescriptionTool(Tool):
name = "describe_image"
description = """
Generates a detailed description of an image from a given URL.
Uses the BLIP image captioning model to provide accurate descriptions.
"""
inputs = {'image_url': {'type': 'string', 'description': 'URL of the image to be described (must be publicly accessible)', 'required': True}}
output_type = "string"
requirements = ['Pillow', 'requests', 'transformers', 'torch']
def __init__(self):
super().__init__()
# Import dependencies here to ensure they're available
import requests
from PIL import Image # PIL is provided by Pillow
from io import BytesIO
from transformers import pipeline
self.requests = requests
self.Image = Image
self.BytesIO = BytesIO
self.pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
def forward(self, image_url: str) -> str:
try:
# Download the image
response = self.requests.get(image_url, timeout=10)
response.raise_for_status()
# Open and validate the image
image = self.Image.open(self.BytesIO(response.content))
# Generate description
description = self.pipeline(image)[0]['generated_text']
return f"Description of the image: {description}"
except Exception as e:
return f"Error processing image: {str(e)}"