Spaces:

joaomorossini
/

image_captioning_model_comparison

Runtime error

App Files Files Community

image_captioning_model_comparison / app.py

joaomorossini

Adjust formatting

c97211f about 1 year ago

raw

history blame contribute delete

3.23 kB

	# --- Project dependencies ---
	import os
	import io
	import base64
	import requests
	import json
	import gradio as gr
	from PIL import Image
	from dotenv import load_dotenv, find_dotenv


	# --- Load environment variables ---
	_ = load_dotenv(find_dotenv()) # read local .env file
	hf_api_key = os.environ["HF_API_KEY"]


	# --- URLs and Endpoints ---
	hf_base_url = "https://huggingface.co/"
	hf_inference_base_url = "https://api-inference.huggingface.co/models/"

	endpoints = [
	"Salesforce/blip-image-captioning-large",
	"Salesforce/blip-image-captioning-base",
	"nlpconnect/vit-gpt2-image-captioning",
	"microsoft/git-base",
	"microsoft/git-large-textcaps",
	"microsoft/git-large-r-coco",
	]


	# --- Define helper functions ---


	# Image-to-text completion
	def get_completion(inputs, parameters=None):
	headers = {
	"Authorization": f"Bearer {hf_api_key}",
	"Content-Type": "application/json",
	}
	data = {"inputs": inputs}
	if parameters is not None:
	data.update({"parameters": parameters})

	results = {}
	for endpoint in endpoints:
	try:
	response = requests.post(
	hf_inference_base_url + endpoint,
	headers=headers,
	data=json.dumps(data),
	)
	response.raise_for_status()
	results[endpoint] = json.loads(response.content.decode("utf-8"))
	except requests.exceptions.RequestException as e:
	print(f"Request to {endpoint} failed: {e}")
	results[endpoint] = {"error": str(e)}

	return results


	# Format image as base64 string
	def image_to_base64_str(pil_image):
	byte_arr = io.BytesIO()
	pil_image.save(byte_arr, format="PNG")
	byte_arr = byte_arr.getvalue()
	return str(base64.b64encode(byte_arr).decode("utf-8"))


	# Define captioner function
	def captioner(image):
	base64_image = image_to_base64_str(image)
	results = get_completion(base64_image)
	captions = []
	for endpoint, result in results.items():
	# Use a smaller heading or remove the heading syntax for regular text size
	# header = f"#### [{endpoint}]({hf_base_url+endpoint}):"
	header = f"[{endpoint}]({hf_base_url+endpoint}):" # No heading, regular text
	if "error" not in result:
	caption = result[0]["generated_text"]
	else:
	caption = f"Error - {result['error']}"
	captions.append(
	f"{header}\n{caption} \n\n"
	) # Use horizontal rule for separation
	return "\n".join(
	captions
	).strip() # Join all captions into a single string, separated by horizontal rules


	# --- Launch the Gradio App ---
	demo = gr.Interface(
	fn=captioner,
	inputs=[gr.Image(label="Upload image", type="pil")],
	outputs=gr.Markdown(label="Captions"), # Use a single Markdown output
	title="Image Captioning Model Comparison",
	description="Upload an image and see how different models describe it!",
	allow_flagging="never",
	examples=[
	"example_1.jpg",
	"example_2.jpg",
	"example_3.jpg",
	"example_4.png",
	"example_5.png",
	],
	)

	demo.launch(share=True, debug=True)


	# --- Close all connections ---
	gr.close_all()