Spaces:

AINxtGen
/

Flux_LoRA_Trainning_on_Modal

Running

App Files Files Community

Flux_LoRA_Trainning_on_Modal / toolkit /image_utils.py

AINxtGen

Init

c22961b 2 days ago

raw

history blame contribute delete

15.9 kB

	# ref https://github.com/scardine/image_size/blob/master/get_image_size.py
	import atexit
	import collections
	import json
	import os
	import io
	import struct
	import threading
	from typing import TYPE_CHECKING

	import cv2
	import numpy as np
	import torch
	from diffusers import AutoencoderTiny

	FILE_UNKNOWN = "Sorry, don't know how to get size for this file."


	class UnknownImageFormat(Exception):
	pass


	types = collections.OrderedDict()
	BMP = types['BMP'] = 'BMP'
	GIF = types['GIF'] = 'GIF'
	ICO = types['ICO'] = 'ICO'
	JPEG = types['JPEG'] = 'JPEG'
	PNG = types['PNG'] = 'PNG'
	TIFF = types['TIFF'] = 'TIFF'

	image_fields = ['path', 'type', 'file_size', 'width', 'height']


	class Image(collections.namedtuple('Image', image_fields)):

	def to_str_row(self):
	return ("%d\t%d\t%d\t%s\t%s" % (
	self.width,
	self.height,
	self.file_size,
	self.type,
	self.path.replace('\t', '\\t'),
	))

	def to_str_row_verbose(self):
	return ("%d\t%d\t%d\t%s\t%s\t##%s" % (
	self.width,
	self.height,
	self.file_size,
	self.type,
	self.path.replace('\t', '\\t'),
	self))

	def to_str_json(self, indent=None):
	return json.dumps(self._asdict(), indent=indent)


	def get_image_size(file_path):
	"""
	Return (width, height) for a given img file content - no external
	dependencies except the os and struct builtin modules
	"""
	img = get_image_metadata(file_path)
	return (img.width, img.height)


	def get_image_size_from_bytesio(input, size):
	"""
	Return (width, height) for a given img file content - no external
	dependencies except the os and struct builtin modules

	Args:
	input (io.IOBase): io object support read & seek
	size (int): size of buffer in byte
	"""
	img = get_image_metadata_from_bytesio(input, size)
	return (img.width, img.height)


	def get_image_metadata(file_path):
	"""
	Return an `Image` object for a given img file content - no external
	dependencies except the os and struct builtin modules

	Args:
	file_path (str): path to an image file

	Returns:
	Image: (path, type, file_size, width, height)
	"""
	size = os.path.getsize(file_path)

	# be explicit with open arguments - we need binary mode
	with io.open(file_path, "rb") as input:
	return get_image_metadata_from_bytesio(input, size, file_path)


	def get_image_metadata_from_bytesio(input, size, file_path=None):
	"""
	Return an `Image` object for a given img file content - no external
	dependencies except the os and struct builtin modules

	Args:
	input (io.IOBase): io object support read & seek
	size (int): size of buffer in byte
	file_path (str): path to an image file

	Returns:
	Image: (path, type, file_size, width, height)
	"""
	height = -1
	width = -1
	data = input.read(26)
	msg = " raised while trying to decode as JPEG."

	if (size >= 10) and data[:6] in (b'GIF87a', b'GIF89a'):
	# GIFs
	imgtype = GIF
	w, h = struct.unpack("<HH", data[6:10])
	width = int(w)
	height = int(h)
	elif ((size >= 24) and data.startswith(b'\211PNG\r\n\032\n')
	and (data[12:16] == b'IHDR')):
	# PNGs
	imgtype = PNG
	w, h = struct.unpack(">LL", data[16:24])
	width = int(w)
	height = int(h)
	elif (size >= 16) and data.startswith(b'\211PNG\r\n\032\n'):
	# older PNGs
	imgtype = PNG
	w, h = struct.unpack(">LL", data[8:16])
	width = int(w)
	height = int(h)
	elif (size >= 2) and data.startswith(b'\377\330'):
	# JPEG
	imgtype = JPEG
	input.seek(0)
	input.read(2)
	b = input.read(1)
	try:
	while (b and ord(b) != 0xDA):
	while (ord(b) != 0xFF):
	b = input.read(1)
	while (ord(b) == 0xFF):
	b = input.read(1)
	if (ord(b) >= 0xC0 and ord(b) <= 0xC3):
	input.read(3)
	h, w = struct.unpack(">HH", input.read(4))
	break
	else:
	input.read(
	int(struct.unpack(">H", input.read(2))[0]) - 2)
	b = input.read(1)
	width = int(w)
	height = int(h)
	except struct.error:
	raise UnknownImageFormat("StructError" + msg)
	except ValueError:
	raise UnknownImageFormat("ValueError" + msg)
	except Exception as e:
	raise UnknownImageFormat(e.__class__.__name__ + msg)
	elif (size >= 26) and data.startswith(b'BM'):
	# BMP
	imgtype = 'BMP'
	headersize = struct.unpack("<I", data[14:18])[0]
	if headersize == 12:
	w, h = struct.unpack("<HH", data[18:22])
	width = int(w)
	height = int(h)
	elif headersize >= 40:
	w, h = struct.unpack("<ii", data[18:26])
	width = int(w)
	# as h is negative when stored upside down
	height = abs(int(h))
	else:
	raise UnknownImageFormat(
	"Unkown DIB header size:" +
	str(headersize))
	elif (size >= 8) and data[:4] in (b"II\052\000", b"MM\000\052"):
	# Standard TIFF, big- or little-endian
	# BigTIFF and other different but TIFF-like formats are not
	# supported currently
	imgtype = TIFF
	byteOrder = data[:2]
	boChar = ">" if byteOrder == "MM" else "<"
	# maps TIFF type id to size (in bytes)
	# and python format char for struct
	tiffTypes = {
	1: (1, boChar + "B"), # BYTE
	2: (1, boChar + "c"), # ASCII
	3: (2, boChar + "H"), # SHORT
	4: (4, boChar + "L"), # LONG
	5: (8, boChar + "LL"), # RATIONAL
	6: (1, boChar + "b"), # SBYTE
	7: (1, boChar + "c"), # UNDEFINED
	8: (2, boChar + "h"), # SSHORT
	9: (4, boChar + "l"), # SLONG
	10: (8, boChar + "ll"), # SRATIONAL
	11: (4, boChar + "f"), # FLOAT
	12: (8, boChar + "d") # DOUBLE
	}
	ifdOffset = struct.unpack(boChar + "L", data[4:8])[0]
	try:
	countSize = 2
	input.seek(ifdOffset)
	ec = input.read(countSize)
	ifdEntryCount = struct.unpack(boChar + "H", ec)[0]
	# 2 bytes: TagId + 2 bytes: type + 4 bytes: count of values + 4
	# bytes: value offset
	ifdEntrySize = 12
	for i in range(ifdEntryCount):
	entryOffset = ifdOffset + countSize + i * ifdEntrySize
	input.seek(entryOffset)
	tag = input.read(2)
	tag = struct.unpack(boChar + "H", tag)[0]
	if (tag == 256 or tag == 257):
	# if type indicates that value fits into 4 bytes, value
	# offset is not an offset but value itself
	type = input.read(2)
	type = struct.unpack(boChar + "H", type)[0]
	if type not in tiffTypes:
	raise UnknownImageFormat(
	"Unkown TIFF field type:" +
	str(type))
	typeSize = tiffTypes[type][0]
	typeChar = tiffTypes[type][1]
	input.seek(entryOffset + 8)
	value = input.read(typeSize)
	value = int(struct.unpack(typeChar, value)[0])
	if tag == 256:
	width = value
	else:
	height = value
	if width > -1 and height > -1:
	break
	except Exception as e:
	raise UnknownImageFormat(str(e))
	elif size >= 2:
	# see http://en.wikipedia.org/wiki/ICO_(file_format)
	imgtype = 'ICO'
	input.seek(0)
	reserved = input.read(2)
	if 0 != struct.unpack("<H", reserved)[0]:
	raise UnknownImageFormat(FILE_UNKNOWN)
	format = input.read(2)
	assert 1 == struct.unpack("<H", format)[0]
	num = input.read(2)
	num = struct.unpack("<H", num)[0]
	if num > 1:
	import warnings
	warnings.warn("ICO File contains more than one image")
	# http://msdn.microsoft.com/en-us/library/ms997538.aspx
	w = input.read(1)
	h = input.read(1)
	width = ord(w)
	height = ord(h)
	else:
	raise UnknownImageFormat(FILE_UNKNOWN)

	return Image(path=file_path,
	type=imgtype,
	file_size=size,
	width=width,
	height=height)


	import unittest


	class Test_get_image_size(unittest.TestCase):
	data = [{
	'path': 'lookmanodeps.png',
	'width': 251,
	'height': 208,
	'file_size': 22228,
	'type': 'PNG'}]

	def setUp(self):
	pass

	def test_get_image_size_from_bytesio(self):
	img = self.data[0]
	p = img['path']
	with io.open(p, 'rb') as fp:
	b = fp.read()
	fp = io.BytesIO(b)
	sz = len(b)
	output = get_image_size_from_bytesio(fp, sz)
	self.assertTrue(output)
	self.assertEqual(output,
	(img['width'],
	img['height']))

	def test_get_image_metadata_from_bytesio(self):
	img = self.data[0]
	p = img['path']
	with io.open(p, 'rb') as fp:
	b = fp.read()
	fp = io.BytesIO(b)
	sz = len(b)
	output = get_image_metadata_from_bytesio(fp, sz)
	self.assertTrue(output)
	for field in image_fields:
	self.assertEqual(getattr(output, field), None if field == 'path' else img[field])

	def test_get_image_metadata(self):
	img = self.data[0]
	output = get_image_metadata(img['path'])
	self.assertTrue(output)
	for field in image_fields:
	self.assertEqual(getattr(output, field), img[field])

	def test_get_image_metadata__ENOENT_OSError(self):
	with self.assertRaises(OSError):
	get_image_metadata('THIS_DOES_NOT_EXIST')

	def test_get_image_metadata__not_an_image_UnknownImageFormat(self):
	with self.assertRaises(UnknownImageFormat):
	get_image_metadata('README.rst')

	def test_get_image_size(self):
	img = self.data[0]
	output = get_image_size(img['path'])
	self.assertTrue(output)
	self.assertEqual(output,
	(img['width'],
	img['height']))

	def tearDown(self):
	pass


	def main(argv=None):
	"""
	Print image metadata fields for the given file path.

	Keyword Arguments:
	argv (list): commandline arguments (e.g. sys.argv[1:])
	Returns:
	int: zero for OK
	"""
	import logging
	import optparse
	import sys

	prs = optparse.OptionParser(
	usage="%prog [-v\|--verbose] [--json\|--json-indent] <path0> [<pathN>]",
	description="Print metadata for the given image paths "
	"(without image library bindings).")

	prs.add_option('--json',
	dest='json',
	action='store_true')
	prs.add_option('--json-indent',
	dest='json_indent',
	action='store_true')

	prs.add_option('-v', '--verbose',
	dest='verbose',
	action='store_true', )
	prs.add_option('-q', '--quiet',
	dest='quiet',
	action='store_true', )
	prs.add_option('-t', '--test',
	dest='run_tests',
	action='store_true', )

	argv = list(argv) if argv is not None else sys.argv[1:]
	(opts, args) = prs.parse_args(args=argv)
	loglevel = logging.INFO
	if opts.verbose:
	loglevel = logging.DEBUG
	elif opts.quiet:
	loglevel = logging.ERROR
	logging.basicConfig(level=loglevel)
	log = logging.getLogger()
	log.debug('argv: %r', argv)
	log.debug('opts: %r', opts)
	log.debug('args: %r', args)

	if opts.run_tests:
	import sys
	sys.argv = [sys.argv[0]] + args
	import unittest
	return unittest.main()

	output_func = Image.to_str_row
	if opts.json_indent:
	import functools
	output_func = functools.partial(Image.to_str_json, indent=2)
	elif opts.json:
	output_func = Image.to_str_json
	elif opts.verbose:
	output_func = Image.to_str_row_verbose

	EX_OK = 0
	EX_NOT_OK = 2

	if len(args) < 1:
	prs.print_help()
	print('')
	prs.error("You must specify one or more paths to image files")

	errors = []
	for path_arg in args:
	try:
	img = get_image_metadata(path_arg)
	print(output_func(img))
	except KeyboardInterrupt:
	raise
	except OSError as e:
	log.error((path_arg, e))
	errors.append((path_arg, e))
	except Exception as e:
	log.exception(e)
	errors.append((path_arg, e))
	pass
	if len(errors):
	import pprint
	print("ERRORS", file=sys.stderr)
	print("======", file=sys.stderr)
	print(pprint.pformat(errors, indent=2), file=sys.stderr)
	return EX_NOT_OK
	return EX_OK


	is_window_shown = False
	display_lock = threading.Lock()
	current_img = None
	update_event = threading.Event()

	def update_image(img, name):
	global current_img
	with display_lock:
	current_img = (img, name)
	update_event.set()

	def display_image_in_thread():
	global is_window_shown

	def display_img():
	global current_img
	while True:
	update_event.wait()
	with display_lock:
	if current_img:
	img, name = current_img
	cv2.imshow(name, img)
	current_img = None
	update_event.clear()
	if cv2.waitKey(1) & 0xFF == 27: # Esc key to stop
	cv2.destroyAllWindows()
	print('\nESC pressed, stopping')
	break

	if not is_window_shown:
	is_window_shown = True
	threading.Thread(target=display_img, daemon=True).start()


	def show_img(img, name='AI Toolkit'):
	img = np.clip(img, 0, 255).astype(np.uint8)
	update_image(img[:, :, ::-1], name)
	if not is_window_shown:
	display_image_in_thread()


	def show_tensors(imgs: torch.Tensor, name='AI Toolkit'):
	if len(imgs.shape) == 4:
	img_list = torch.chunk(imgs, imgs.shape[0], dim=0)
	else:
	img_list = [imgs]

	img = torch.cat(img_list, dim=3)
	img = img / 2 + 0.5
	img_numpy = img.to(torch.float32).detach().cpu().numpy()
	img_numpy = np.clip(img_numpy, 0, 1) * 255
	img_numpy = img_numpy.transpose(0, 2, 3, 1)
	img_numpy = img_numpy.astype(np.uint8)

	show_img(img_numpy[0], name=name)


	def show_latents(latents: torch.Tensor, vae: 'AutoencoderTiny', name='AI Toolkit'):
	if vae.device == 'cpu':
	vae.to(latents.device)
	latents = latents / vae.config['scaling_factor']
	imgs = vae.decode(latents).sample
	show_tensors(imgs, name=name)


	def on_exit():
	if is_window_shown:
	cv2.destroyAllWindows()


	def reduce_contrast(tensor, factor):
	# Ensure factor is between 0 and 1
	factor = max(0, min(factor, 1))

	# Calculate the mean of the tensor
	mean = torch.mean(tensor)

	# Reduce contrast
	adjusted_tensor = (tensor - mean) * factor + mean

	# Clip values to ensure they stay within -1 to 1 range
	return torch.clamp(adjusted_tensor, -1.0, 1.0)

	atexit.register(on_exit)

	if __name__ == "__main__":
	import sys

	sys.exit(main(argv=sys.argv[1:]))