super-fast-lcm-lora-sd1.5

Running

App Files Files Community

super-fast-lcm-lora-sd1.5 / app.py

multimodalart HF staff

Update app.py

12fd800 over 1 year ago

raw

history blame

5.41 kB

	from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny
	from compel import Compel, ReturnedEmbeddingsType
	import torch
	import os

	try:
	import intel_extension_for_pytorch as ipex
	except:
	pass

	from PIL import Image
	import numpy as np
	import gradio as gr
	import psutil


	SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
	TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
	HF_TOKEN = os.environ.get("HF_TOKEN", None)
	# check if MPS is available OSX only M1/M2/M3 chips
	mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
	xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
	device = torch.device(
	"cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
	)
	torch_device = device
	torch_dtype = torch.float16

	print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
	print(f"TORCH_COMPILE: {TORCH_COMPILE}")
	print(f"device: {device}")

	if mps_available:
	device = torch.device("mps")
	torch_device = "cpu"
	torch_dtype = torch.float32

	model_id = "stabilityai/stable-diffusion-xl-base-1.0"

	if SAFETY_CHECKER == "True":
	pipe = DiffusionPipeline.from_pretrained(model_id)
	else:
	pipe = DiffusionPipeline.from_pretrained(model_id, safety_checker=None)

	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
	pipe.to(device=torch_device, dtype=torch_dtype).to(device)
	pipe.unet.to(memory_format=torch.channels_last)

	# check if computer has less than 64GB of RAM using sys or os
	if psutil.virtual_memory().total < 64 * 1024**3:
	pipe.enable_attention_slicing()

	if TORCH_COMPILE:
	pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)

	pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)

	# Load LCM LoRA
	pipe.load_lora_weights(
	"lcm-sd/lcm-sdxl-lora",
	weight_name="lcm_sdxl_lora.safetensors",
	#adapter_name="lcm",
	use_auth_token=HF_TOKEN,
	)

	## Load papercut LoRA
	#pipe.load_lora_weights(
	# "TheLastBen/Papercut_SDXL",
	# weight_name="papercut.safetensors",
	# adapter_name="papercut",
	#)

	# Mix the LoRAs
	#pipe.set_adapters(["lcm", "papercut"], adapter_weights=[1.0, 0.8])

	compel_proc = Compel(
	tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
	text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
	returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
	requires_pooled=[False, True],
	)


	def predict(
	prompt, guidance, steps, seed=1231231, progress=gr.Progress(track_tqdm=True)
	):
	generator = torch.manual_seed(seed)
	prompt_embeds, pooled_prompt_embeds = compel_proc(prompt)

	results = pipe(
	prompt_embeds=prompt_embeds,
	pooled_prompt_embeds=pooled_prompt_embeds,
	generator=generator,
	num_inference_steps=steps,
	guidance_scale=guidance,
	width=1024,
	height=1024,
	# original_inference_steps=params.lcm_steps,
	output_type="pil",
	)
	nsfw_content_detected = (
	results.nsfw_content_detected[0]
	if "nsfw_content_detected" in results
	else False
	)
	if nsfw_content_detected:
	raise gr.Error("NSFW content detected.")
	return results.images[0]


	css = """
	#container{
	margin: 0 auto;
	max-width: 50rem;
	}
	#intro{
	max-width: 100%;
	text-align: center;
	margin: 0 auto;
	}
	"""
	with gr.Blocks(css=css) as demo:
	with gr.Column(elem_id="container"):
	gr.Markdown(
	"""# Ultra-Fast SDXL with Latent Consistency LoRA
	In this Space, SDXL is loaded with a latent consistency LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more on our blog](#) or [technical report](#).
	""",
	elem_id="intro",
	)
	with gr.Row():
	with gr.Row():
	prompt = gr.Textbox(
	placeholder="Insert your prompt here:", value="papercut style of a cute monster", scale=5, container=False
	)
	generate_bt = gr.Button("Generate", scale=1)

	image = gr.Image(type="filepath")
	with gr.Accordion("Advanced options", open=False):
	guidance = gr.Slider(
	label="Guidance", minimum=0.0, maximum=5, value=0.3, step=0.001
	)
	steps = gr.Slider(label="Steps", value=4, minimum=2, maximum=10, step=1)
	seed = gr.Slider(
	randomize=True, minimum=0, maximum=12013012031030, label="Seed", step=1
	)
	with gr.Group():
	gr.Markdown('''## Using it with `diffusers`
	```py
	from diffusers import DiffusionPipeline, LCMScheduler
	pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
	pipe.load_lora_weights("lcm-sd/lcm-sdxl-lora")

	results = pipe(
	prompt="The spirit of a tamagotchi wandering in the city of Vienna",
	num_inference_steps=4,
	guidance_scale=0.5,
	)
	results.images[0]
	```
	''')

	inputs = [prompt, guidance, steps, seed]
	generate_bt.click(fn=predict, inputs=inputs, outputs=image)

	demo.queue()
	demo.launch()