SMarioMan's picture
More fixes
94d4d18
from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny
import torch
import os
try:
import intel_extension_for_pytorch as ipex
except:
pass
from PIL import Image
import numpy as np
import gradio as gr
import psutil
import time
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
HF_TOKEN = os.environ.get("HF_TOKEN", None)
# check if MPS is available OSX only M1/M2/M3 chips
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
device = torch.device(
"cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
)
torch_device = device
torch_dtype = torch.float16
print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
print(f"TORCH_COMPILE: {TORCH_COMPILE}")
print(f"device: {device}")
if mps_available:
device = torch.device("mps")
torch_device = "cpu"
torch_dtype = torch.float32
if SAFETY_CHECKER == "True":
pipe = DiffusionPipeline.from_pretrained("Lykon/dreamshaper-7")
else:
pipe = DiffusionPipeline.from_pretrained("Lykon/dreamshaper-7", safety_checker=None)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.to(device=torch_device, dtype=torch_dtype).to(device)
pipe.unet.to(memory_format=torch.channels_last)
pipe.set_progress_bar_config(disable=True)
# check if computer has less than 64GB of RAM using sys or os
if psutil.virtual_memory().total < 64 * 1024**3:
pipe.enable_attention_slicing()
if TORCH_COMPILE:
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
# Load LCM LoRA
pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
pipe.fuse_lora()
def predict(prompt, negative_prompt, guidance, steps, seed=1231231):
generator = torch.manual_seed(seed)
last_time = time.time()
results = pipe(
prompt=prompt,
generator=generator,
negative_prompt=negative_prompt,
num_inference_steps=steps,
guidance_scale=guidance,
width=512,
height=512,
# original_inference_steps=params.lcm_steps,
output_type="pil",
)
print(f"Pipe took {time.time() - last_time} seconds")
nsfw_content_detected = (
results.nsfw_content_detected[0]
if "nsfw_content_detected" in results
else False
)
if nsfw_content_detected:
gr.Warning("NSFW content detected.")
return Image.new("RGB", (512, 512))
return results.images[0]
css = """
#container{
margin: 0 auto;
max-width: 40rem;
}
#intro{
max-width: 100%;
text-align: center;
margin: 0 auto;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="container"):
gr.Markdown(
"""# SD1.5 Latent Consistency LoRAs
SD1.5 is loaded with a LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more on our blog](#) or [technical report](#).
""",
elem_id="intro",
)
prompt = gr.Textbox(
placeholder="Insert your prompt here:", lines= 4, container=False
)
negative_prompt = gr.Textbox(
placeholder="Insert your negative prompt here:", lines= 4, container=False
)
generate_bt = gr.Button("Generate", scale=1)
image = gr.Image(type="filepath")
with gr.Accordion("Advanced options", open=True):
guidance = gr.Slider(
label="Guidance", minimum=0.0, maximum=5, value=1.5, step=0.001
)
steps = gr.Slider(label="Steps", value=8, minimum=1, maximum=50, step=1)
seed = gr.Slider(
randomize=True, minimum=0, maximum=12013012031030, label="Seed", step=1
)
inputs = [prompt, negative_prompt, guidance, steps, seed]
generate_bt.click(fn=predict, inputs=inputs, outputs=image, show_progress=False)
prompt.input(fn=predict, inputs=inputs, outputs=image, show_progress=False)
negative_prompt.input(fn=predict, inputs=inputs, outputs=image, show_progress=False)
guidance.change(fn=predict, inputs=inputs, outputs=image, show_progress=False)
steps.change(fn=predict, inputs=inputs, outputs=image, show_progress=False)
seed.change(fn=predict, inputs=inputs, outputs=image, show_progress=False)
demo.queue()
demo.launch()