Spaces:

MehulJ
/

what_is_this_image_about

Running

what_is_this_image_about / tool.py

Updated tool with improved error handling and documentation

e16e32d verified 3 months ago

1.62 kB

	from typing import Any, Optional
	from smolagents.tools import Tool
	import io
	import requests
	import transformers
	import PIL

	class ImageDescriptionTool(Tool):
	name = "describe_image"
	description = """
	Generates a detailed description of an image from a given URL.
	Uses the BLIP image captioning model to provide accurate descriptions.
	"""
	inputs = {'image_url': {'type': 'string', 'description': 'URL of the image to be described (must be publicly accessible)', 'required': True}}
	output_type = "string"
	requirements = ['Pillow', 'requests', 'transformers', 'torch']

	def __init__(self):
	super().__init__()
	# Import dependencies here to ensure they're available
	import requests
	from PIL import Image # PIL is provided by Pillow
	from io import BytesIO
	from transformers import pipeline

	self.requests = requests
	self.Image = Image
	self.BytesIO = BytesIO
	self.pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

	def forward(self, image_url: str) -> str:
	try:
	# Download the image
	response = self.requests.get(image_url, timeout=10)
	response.raise_for_status()

	# Open and validate the image
	image = self.Image.open(self.BytesIO(response.content))

	# Generate description
	description = self.pipeline(image)[0]['generated_text']

	return f"Description of the image: {description}"
	except Exception as e:
	return f"Error processing image: {str(e)}"