MehulJ's picture
Updated tool with improved error handling and documentation
e16e32d verified
raw
history blame
1.62 kB
from typing import Any, Optional
from smolagents.tools import Tool
import io
import requests
import transformers
import PIL
class ImageDescriptionTool(Tool):
name = "describe_image"
description = """
Generates a detailed description of an image from a given URL.
Uses the BLIP image captioning model to provide accurate descriptions.
"""
inputs = {'image_url': {'type': 'string', 'description': 'URL of the image to be described (must be publicly accessible)', 'required': True}}
output_type = "string"
requirements = ['Pillow', 'requests', 'transformers', 'torch']
def __init__(self):
super().__init__()
# Import dependencies here to ensure they're available
import requests
from PIL import Image # PIL is provided by Pillow
from io import BytesIO
from transformers import pipeline
self.requests = requests
self.Image = Image
self.BytesIO = BytesIO
self.pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
def forward(self, image_url: str) -> str:
try:
# Download the image
response = self.requests.get(image_url, timeout=10)
response.raise_for_status()
# Open and validate the image
image = self.Image.open(self.BytesIO(response.content))
# Generate description
description = self.pipeline(image)[0]['generated_text']
return f"Description of the image: {description}"
except Exception as e:
return f"Error processing image: {str(e)}"