import gradio as gr from PIL import Image from model import GitBaseCocoModel def generate_captions( image: Image, max_len: int = 50, num_captions: int = 1, ): """ Generates captions for the given image. ----- Parameters: image: PIL.Image The image to generate captions for. max_len: int The maximum length of the caption. num_captions: int The number of captions to generate. ----- Returns: list[str] """ device = "cuda" if gradio.use_gpu else "cpu" checkpoint = "microsoft/git-base-coco" model = GitBaseCocoModel(device, checkpoint) caption = model.generate(image, max_len, num_captions) return caption inputs = [ gr.inputs.Image(type="pil", label="Image"), gr.inputs.Number(default=50, label="Maximum Caption Length"), gr.inputs.Number(default=1, label="Number of Captions to Generate"), ] # Determine the number of outputs based on the number of captions to generate. outputs = [gr.outputs.Textbox(label=f"Caption {i+1}") for i in range(inputs[2].value)] title = "Git-Base-COCO Image Captioning" description = "A model for generating captions for images." gr.Interface( fn=generate_captions, inputs=inputs, outputs=outputs, title=title, description=description, enable_queue=True, ).launch(debug=True)