Spaces:

thorfinn0330
/

DiffusionModel

Build error

App Files Files Community

DiffusionModel / library /blip_caption_gui.py

thorfinn0330

Upload folder using huggingface_hub

11c2c17 verified 5 months ago

raw

history blame contribute delete

4.4 kB

	import gradio as gr
	from easygui import msgbox
	import subprocess
	import os
	from .common_gui import get_folder_path, add_pre_postfix
	from library.custom_logging import setup_logging

	# Set up logging
	log = setup_logging()

	PYTHON = 'python3' if os.name == 'posix' else './venv/Scripts/python.exe'


	def caption_images(
	train_data_dir,
	caption_file_ext,
	batch_size,
	num_beams,
	top_p,
	max_length,
	min_length,
	beam_search,
	prefix,
	postfix,
	):
	# Check if the image folder is provided
	if train_data_dir == '':
	msgbox('Image folder is missing...')
	return

	# Check if the caption file extension is provided
	if caption_file_ext == '':
	msgbox('Please provide an extension for the caption files.')
	return

	log.info(f'Captioning files in {train_data_dir}...')

	# Construct the command to run
	run_cmd = f'{PYTHON} "finetune/make_captions.py"'
	run_cmd += f' --batch_size="{int(batch_size)}"'
	run_cmd += f' --num_beams="{int(num_beams)}"'
	run_cmd += f' --top_p="{top_p}"'
	run_cmd += f' --max_length="{int(max_length)}"'
	run_cmd += f' --min_length="{int(min_length)}"'
	if beam_search:
	run_cmd += f' --beam_search'
	if caption_file_ext != '':
	run_cmd += f' --caption_extension="{caption_file_ext}"'
	run_cmd += f' "{train_data_dir}"'
	run_cmd += f' --caption_weights="https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth"'

	log.info(run_cmd)

	# Run the command
	if os.name == 'posix':
	os.system(run_cmd)
	else:
	subprocess.run(run_cmd)

	# Add prefix and postfix
	add_pre_postfix(
	folder=train_data_dir,
	caption_file_ext=caption_file_ext,
	prefix=prefix,
	postfix=postfix,
	)

	log.info('...captioning done')


	###
	# Gradio UI
	###


	def gradio_blip_caption_gui_tab(headless=False):
	with gr.Tab('BLIP Captioning'):
	gr.Markdown(
	'This utility uses BLIP to caption files for each image in a folder.'
	)
	with gr.Row():
	train_data_dir = gr.Textbox(
	label='Image folder to caption',
	placeholder='Directory containing the images to caption',
	interactive=True,
	)
	button_train_data_dir_input = gr.Button(
	'📂', elem_id='open_folder_small', visible=(not headless)
	)
	button_train_data_dir_input.click(
	get_folder_path,
	outputs=train_data_dir,
	show_progress=False,
	)
	with gr.Row():
	caption_file_ext = gr.Textbox(
	label='Caption file extension',
	placeholder='Extension for caption file, e.g., .caption, .txt',
	value='.txt',
	interactive=True,
	)

	prefix = gr.Textbox(
	label='Prefix to add to BLIP caption',
	placeholder='(Optional)',
	interactive=True,
	)

	postfix = gr.Textbox(
	label='Postfix to add to BLIP caption',
	placeholder='(Optional)',
	interactive=True,
	)

	batch_size = gr.Number(
	value=1, label='Batch size', interactive=True
	)

	with gr.Row():
	beam_search = gr.Checkbox(
	label='Use beam search', interactive=True, value=True
	)
	num_beams = gr.Number(
	value=1, label='Number of beams', interactive=True
	)
	top_p = gr.Number(value=0.9, label='Top p', interactive=True)
	max_length = gr.Number(
	value=75, label='Max length', interactive=True
	)
	min_length = gr.Number(
	value=5, label='Min length', interactive=True
	)

	caption_button = gr.Button('Caption images')

	caption_button.click(
	caption_images,
	inputs=[
	train_data_dir,
	caption_file_ext,
	batch_size,
	num_beams,
	top_p,
	max_length,
	min_length,
	beam_search,
	prefix,
	postfix,
	],
	show_progress=False,
	)