Spaces:

el-el-san
/

t2i_demo

Running on Zero

App Files Files Community

t2i_demo / app.py

el-el-san

Update app.py

191ea97 verified 7 months ago

raw

history blame

5.19 kB

	import gradio as gr
	import numpy as np
	from PIL import Image
	import random
	from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
	from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
	import cv2
	import torch
	import spaces

	def nms(x, t, s):
	x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)

	f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
	f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
	f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
	f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)

	y = np.zeros_like(x)

	for f in [f1, f2, f3, f4]:
	np.putmask(y, cv2.dilate(x, kernel=f) == x, x)

	z = np.zeros_like(y, dtype=np.uint8)
	z[y > t] = 255
	return z

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	controlnet = ControlNetModel.from_pretrained(
	"xinsir/controlnet-scribble-sdxl-1.0",
	torch_dtype=torch.float16
	)

	vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	"yodayo-ai/holodayo-xl-2.1",
	controlnet=controlnet,
	vae=vae,
	torch_dtype=torch.float16,
	)

	pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
	pipe.to(device)

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1216

	@spaces.GPU
	def infer(image: Image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps) -> Image:

	width, height = image.size
	ratio = np.sqrt(1024. * 1024. / (width * height))
	new_width, new_height = int(width * ratio), int(height * ratio)
	image = image.resize((new_width, new_height))

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	controlnet_img = np.array(image)
	controlnet_img = nms(controlnet_img, 127, 3)
	controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)

	random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
	controlnet_img[controlnet_img > random_val] = 255
	controlnet_img[controlnet_img < 255] = 0
	image = Image.fromarray(controlnet_img)

	generator = torch.Generator().manual_seed(seed)

	output_image = pipe(
	prompt=prompt + ", masterpiece, best quality, very aesthetic, absurdres",
	negative_prompt=negative_prompt,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	width=width,
	height=height,
	generator=generator
	).images[0]

	return output_image

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 520px;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown("""
	# Text-to-Image Demo
	using [Holodayo XL 2.1](https://huggingface.co/yodayo-ai/holodayo-xl-2.1)
	""")

	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	container=False,
	)

	run_button = gr.Button("Run", scale=0)

	image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512))
	result = gr.Image(label="Result", show_label=False)

	with gr.Accordion("Advanced Settings", open=False):

	negative_prompt = gr.Text(
	label="Negative prompt",
	max_lines=1,
	placeholder="Enter a negative prompt",
	value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=832,
	)

	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=32,
	value=1216,
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=0.0,
	maximum=20.0,
	step=0.1,
	value=7,
	)

	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=28,
	step=1,
	value=28,
	)

	run_button.click(
	fn=infer,
	inputs=[image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
	outputs=[result]
	)

	demo.queue().launch()