import gradio as gr import numpy as np import PIL.Image from PIL import Image import random from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler from diffusers.utils import load_image import cv2 import torch import spaces def nms(x, t, s): x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s) f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) y = np.zeros_like(x) for f in [f1, f2, f3, f4]: np.putmask(y, cv2.dilate(x, kernel=f) == x, x) z = np.zeros_like(y, dtype=np.uint8) z[y > t] = 255 return z device = torch.device("cuda" if torch.cuda.is_available() else "cpu") controlnet = ControlNetModel.from_pretrained( "xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch.float16 ) vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "yodayo-ai/holodayo-xl-2.1", controlnet=controlnet, vae=vae, torch_dtype=torch.float16, ) pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) pipe.to(device) MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1216 @spaces.GPU def infer(image: PIL.Image.Image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps) -> PIL.Image.Image: width, height = image['composite'].size ratio = np.sqrt(1024. * 1024. / (width * height)) new_width, new_height = int(width * ratio), int(height * ratio) image = image['composite'].resize((new_width, new_height)) print(image) if randomize_seed: seed = random.randint(0, MAX_SEED) controlnet_img = image generator = torch.Generator().manual_seed(seed) output_image = pipe( prompt=prompt + ", masterpiece, best quality, very aesthetic, absurdres", negative_prompt=negative_prompt, image=image, controlnet_conditioning_scale=1.0, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=new_width, height=new_height, generator=generator ).images[0] return output_image css = """ #col-container { margin: 0 auto; max-width: 520px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(""" # Text-to-Image Demo using [Holodayo XL 2.1](https://huggingface.co/yodayo-ai/holodayo-xl-2.1) """) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0) image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512)) result = gr.Image(label="Result", show_label=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt", value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn" ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): width = gr.Slider( label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024,#832, ) height = gr.Slider( label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024,#1216, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=20.0, step=0.1, value=7, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=28, step=1, value=28, ) run_button.click(lambda x: None, inputs=None, outputs=image_slider).then( fn=infer, inputs=[image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps], outputs=[result] ) demo.queue().launch()