import gradio as gr
from PIL import Image

from model import GitBaseCocoModel


def generate_captions(
	image: Image,
	max_len: int = 50,
	num_captions: int = 1,
	):
	"""
	Generates captions for the given image.
	
	-----
	Parameters:
	image: PIL.Image
		The image to generate captions for.
	max_len: int
		The maximum length of the caption.
	num_captions: int
		The number of captions to generate.

	-----
	Returns:
	list[str]
	"""

	device = "cuda" if gradio.use_gpu else "cpu"
	checkpoint = "microsoft/git-base-coco"
	
	model = GitBaseCocoModel(device, checkpoint)

	caption = model.generate(image, max_len, num_captions)
	# Convert list to a single string separated by newlines.
	caption = "\n".join(caption)
	return caption


inputs = [
	gr.inputs.Image(type="pil", label="Image"),
	gr.inputs.Number(default=50, label="Maximum Caption Length"),
	gr.inputs.Number(default=1, label="Number of Captions to Generate"),
]
# Determine the number of outputs based on the number of captions to generate.
outputs = gr.outputs.Textbox(label="Captions")

title = "Git-Base-COCO Image Captioning"
description = "A model for generating captions for images."

interface = gr.Interface(
	fn=generate_captions,
	inputs=inputs,
	outputs=outputs,
	title=title,
	description=description,
	)


if __name__ == "__main__":
	interface.launch(
		enable_queue=True,
	)