import os
import gradio as gr
from transformers import pipeline

# Initialize the image-to-text pipeline with the specified model
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

def launch(input):
    """
    Function to generate image caption.

    Args:
    input (PIL.Image): Input image for captioning.

    Returns:
    str: Generated caption for the input image.
    """
    out = pipe(input)
    return out[0]['generated_text']

# Create a Gradio interface for the image-to-text pipeline
iface = gr.Interface(
    fn=launch,             # Function to generate captions
    inputs=gr.Image(type='pil'),  # Input type: Image (PIL format)
    outputs="text"         # Output type: Text
)

# Launch the Gradio interface
iface.launch()