import gradio as gr from PIL import Image from model import GitBaseCocoModel def generate_captions( image: Image, max_len: int = 50, num_captions: int = 1, ): """ Generates captions for the given image. ----- Parameters: image: PIL.Image The image to generate captions for. max_len: int The maximum length of the caption. num_captions: int The number of captions to generate. ----- Returns: list[str] """ device = "cuda" if gradio.use_gpu else "cpu" checkpoint = "microsoft/git-base-coco" model = GitBaseCocoModel(device, checkpoint) caption = model.generate(image, max_len, num_captions) # Convert list to a single string separated by newlines. caption = "\n".join(caption) return caption inputs = [ gr.inputs.Image(type="pil", label="Image"), gr.inputs.Number(default=50, label="Maximum Caption Length"), gr.inputs.Number(default=1, label="Number of Captions to Generate"), ] # Determine the number of outputs based on the number of captions to generate. outputs = gr.outputs.Textbox(label="Captions") title = "Git-Base-COCO Image Captioning" description = "A model for generating captions for images." interface = gr.Interface( fn=generate_captions, inputs=inputs, outputs=outputs, title=title, description=description, ) if __name__ == "__main__": interface.launch( enable_queue=True, )