Spaces:
Running
Running
from typing import Any, Optional | |
from smolagents.tools import Tool | |
import io | |
import requests | |
import transformers | |
import PIL | |
class ImageDescriptionTool(Tool): | |
name = "describe_image" | |
description = """ | |
Generates a detailed description of an image from a given URL. | |
Uses the BLIP image captioning model to provide accurate descriptions. | |
""" | |
inputs = {'image_url': {'type': 'string', 'description': 'URL of the image to be described (must be publicly accessible)', 'required': True}} | |
output_type = "string" | |
requirements = ['Pillow', 'requests', 'transformers', 'torch'] | |
def __init__(self): | |
super().__init__() | |
# Import dependencies here to ensure they're available | |
import requests | |
from PIL import Image # PIL is provided by Pillow | |
from io import BytesIO | |
from transformers import pipeline | |
self.requests = requests | |
self.Image = Image | |
self.BytesIO = BytesIO | |
self.pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") | |
def forward(self, image_url: str) -> str: | |
try: | |
# Download the image | |
response = self.requests.get(image_url, timeout=10) | |
response.raise_for_status() | |
# Open and validate the image | |
image = self.Image.open(self.BytesIO(response.content)) | |
# Generate description | |
description = self.pipeline(image)[0]['generated_text'] | |
return f"Description of the image: {description}" | |
except Exception as e: | |
return f"Error processing image: {str(e)}" | |