toto10
/

extensions

Model card Files Files and versions Community

extensions / addtional /scripts /safetensors_hack.py

toto10

Upload folder using huggingface_hub (#1)

34097e9 over 1 year ago

raw

history blame

4.42 kB

	import io
	import os
	import mmap
	import torch
	import json
	import hashlib
	import safetensors
	import safetensors.torch

	from modules import sd_models

	# PyTorch 1.13 and later have _UntypedStorage renamed to UntypedStorage
	UntypedStorage = torch.storage.UntypedStorage if hasattr(torch.storage, 'UntypedStorage') else torch.storage._UntypedStorage

	def read_metadata(filename):
	"""Reads the JSON metadata from a .safetensors file"""
	with open(filename, mode="r", encoding="utf8") as file_obj:
	with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m:
	header = m.read(8)
	n = int.from_bytes(header, "little")
	metadata_bytes = m.read(n)
	metadata = json.loads(metadata_bytes)

	return metadata.get("__metadata__", {})


	def load_file(filename, device):
	""""Loads a .safetensors file without memory mapping that locks the model file.
	Works around safetensors issue: https://github.com/huggingface/safetensors/issues/164"""
	with open(filename, mode="r", encoding="utf8") as file_obj:
	with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m:
	header = m.read(8)
	n = int.from_bytes(header, "little")
	metadata_bytes = m.read(n)
	metadata = json.loads(metadata_bytes)

	size = os.stat(filename).st_size
	storage = UntypedStorage.from_file(filename, False, size)
	offset = n + 8
	md = metadata.get("__metadata__", {})
	return {name: create_tensor(storage, info, offset) for name, info in metadata.items() if name != "__metadata__"}, md


	def hash_file(filename):
	"""Hashes a .safetensors file using the new hashing method.
	Only hashes the weights of the model."""
	hash_sha256 = hashlib.sha256()
	blksize = 1024 * 1024

	with open(filename, mode="r", encoding="utf8") as file_obj:
	with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m:
	header = m.read(8)
	n = int.from_bytes(header, "little")

	with open(filename, mode="rb") as file_obj:
	offset = n + 8
	file_obj.seek(offset)
	for chunk in iter(lambda: file_obj.read(blksize), b""):
	hash_sha256.update(chunk)

	return hash_sha256.hexdigest()


	def legacy_hash_file(filename):
	"""Hashes a model file using the legacy `sd_models.model_hash()` method."""
	hash_sha256 = hashlib.sha256()

	metadata = read_metadata(filename)

	# For compatibility with legacy models: This replicates the behavior of
	# sd_models.model_hash as if there were no user-specified metadata in the
	# .safetensors file. That leaves the training parameters, which are
	# immutable. It is important the hash does not include the embedded user
	# metadata as that would mean the hash could change every time the user
	# updates the name/description/etc. The new hashing method fixes this
	# problem by only hashing the region of the file containing the tensors.
	if any(not k.startswith("ss_") for k in metadata):
	# Strip the user metadata, re-serialize the file as if it were freshly
	# created from sd-scripts, and hash that with model_hash's behavior.
	tensors, metadata = load_file(filename, "cpu")
	metadata = {k: v for k, v in metadata.items() if k.startswith("ss_")}
	model_bytes = safetensors.torch.save(tensors, metadata)

	hash_sha256.update(model_bytes[0x100000:0x110000])
	return hash_sha256.hexdigest()[0:8]
	else:
	# This should work fine with model_hash since when the legacy hashing
	# method was being used the user metadata system hadn't been implemented
	# yet.
	return sd_models.model_hash(filename)


	DTYPES = {
	"F64": torch.float64,
	"F32": torch.float32,
	"F16": torch.float16,
	"BF16": torch.bfloat16,
	"I64": torch.int64,
	# "U64": torch.uint64,
	"I32": torch.int32,
	# "U32": torch.uint32,
	"I16": torch.int16,
	# "U16": torch.uint16,
	"I8": torch.int8,
	"U8": torch.uint8,
	"BOOL": torch.bool
	}


	def create_tensor(storage, info, offset):
	"""Creates a tensor without holding on to an open handle to the parent model
	file."""
	dtype = DTYPES[info["dtype"]]
	shape = info["shape"]
	start, stop = info["data_offsets"]
	return torch.asarray(storage[start + offset : stop + offset], dtype=torch.uint8).view(dtype=dtype).reshape(shape).clone().detach()