t2i_demo / app.py
el-el-san's picture
Update app.py
191ea97 verified
raw
history blame
5.19 kB
import gradio as gr
import numpy as np
from PIL import Image
import random
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
import cv2
import torch
import spaces
def nms(x, t, s):
x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
y = np.zeros_like(x)
for f in [f1, f2, f3, f4]:
np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
z = np.zeros_like(y, dtype=np.uint8)
z[y > t] = 255
return z
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
controlnet = ControlNetModel.from_pretrained(
"xinsir/controlnet-scribble-sdxl-1.0",
torch_dtype=torch.float16
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"yodayo-ai/holodayo-xl-2.1",
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to(device)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1216
@spaces.GPU
def infer(image: Image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps) -> Image:
width, height = image.size
ratio = np.sqrt(1024. * 1024. / (width * height))
new_width, new_height = int(width * ratio), int(height * ratio)
image = image.resize((new_width, new_height))
if randomize_seed:
seed = random.randint(0, MAX_SEED)
controlnet_img = np.array(image)
controlnet_img = nms(controlnet_img, 127, 3)
controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
controlnet_img[controlnet_img > random_val] = 255
controlnet_img[controlnet_img < 255] = 0
image = Image.fromarray(controlnet_img)
generator = torch.Generator().manual_seed(seed)
output_image = pipe(
prompt=prompt + ", masterpiece, best quality, very aesthetic, absurdres",
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
width=width,
height=height,
generator=generator
).images[0]
return output_image
css = """
#col-container {
margin: 0 auto;
max-width: 520px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""
# Text-to-Image Demo
using [Holodayo XL 2.1](https://huggingface.co/yodayo-ai/holodayo-xl-2.1)
""")
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt",
container=False,
)
run_button = gr.Button("Run", scale=0)
image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512))
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
negative_prompt = gr.Text(
label="Negative prompt",
max_lines=1,
placeholder="Enter a negative prompt",
value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=32,
value=832,
)
height = gr.Slider(
label="Height",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=32,
value=1216,
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=20.0,
step=0.1,
value=7,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=28,
step=1,
value=28,
)
run_button.click(
fn=infer,
inputs=[image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
outputs=[result]
)
demo.queue().launch()