Spaces:

el-el-san
/

t2i_demo

Running on Zero

App Files Files Community

el-el-san commited on Jun 12, 2024

Commit

83901f1

verified ·

1 Parent(s): 9f960f1

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -18

app.py CHANGED Viewed

@@ -1,39 +1,136 @@
 import gradio as gr
 import numpy as np
 import random
 #from diffusers import DiffusionPipeline
-from diffusers import StableDiffusionXLPipeline
 import torch
 import spaces
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1216
-#pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
-pipe = StableDiffusionXLPipeline.from_pretrained(
     #"yodayo-ai/kivotos-xl-2.0",
     "yodayo-ai/holodayo-xl-2.1",
-    torch_dtype=torch.float16,
-    use_safetensors=True,
-    custom_pipeline="lpw_stable_diffusion_xl",
-    add_watermarker=False,
-    variant="fp16"
 )
-pipe.to('cuda')
 prompt = "1girl, solo, upper body, v, smile, looking at viewer, outdoors, night, masterpiece, best quality, very aesthetic, absurdres"
 negative_prompt = "nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
 @spaces.GPU
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    image = pipe(
         prompt = prompt+", masterpiece, best quality, very aesthetic, absurdres",
         negative_prompt = negative_prompt,
         guidance_scale = guidance_scale,
@@ -43,7 +140,7 @@ def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance
         generator = generator
     ).images[0]
-    return image
 css="""
 #col-container {
@@ -71,7 +168,8 @@ with gr.Blocks(css=css) as demo:
             )
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
@@ -80,7 +178,8 @@ with gr.Blocks(css=css) as demo:
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
-                visible=False,
             )
             seed = gr.Slider(
@@ -131,7 +230,7 @@ with gr.Blocks(css=css) as demo:
     run_button.click(
         fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
         outputs = [result]
     )

 import gradio as gr
 import numpy as np
+import PIL.Image
 import random
 #from diffusers import DiffusionPipeline
+#from diffusers import StableDiffusionXLPipeline
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
+from controlnet_aux import PidiNetDetector, HEDdetector
+from diffusers.utils import load_image
+import cv2
 import torch
 import spaces
+def nms(x, t, s):
+    x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
+    f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
+    f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
+    f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
+    f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
+    y = np.zeros_like(x)
+    for f in [f1, f2, f3, f4]:
+        np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
+    z = np.zeros_like(y, dtype=np.uint8)
+    z[y > t] = 255
+    return z
+def HWC3(x):
+    assert x.dtype == np.uint8
+    if x.ndim == 2:
+        x = x[:, :, None]
+    assert x.ndim == 3
+    H, W, C = x.shape
+    assert C == 1 or C == 3 or C == 4
+    if C == 3:
+        return x
+    if C == 1:
+        return np.concatenate([x, x, x], axis=2)
+    if C == 4:
+        color = x[:, :, 0:3].astype(np.float32)
+        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
+        y = color * alpha + 255.0 * (1.0 - alpha)
+        y = y.clip(0, 255).astype(np.uint8)
+        return y
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
+controlnet = ControlNetModel.from_pretrained(
+    #"xinsir/controlnet-scribble-sdxl-1.0",
+    "2vXpSwA7/test_controlnet2/CN-anytest_v4-marged_am_dim256.safetensors"
+    torch_dtype=torch.float16
+)
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    #"sd-community/sdxl-flash",
     #"yodayo-ai/kivotos-xl-2.0",
     "yodayo-ai/holodayo-xl-2.1",
+    controlnet=controlnet,
+    vae=vae,
+    torch_dtype=torch.float16,
+    # scheduler=eulera_scheduler,
 )
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+pipe.to(device)
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1216
+#pipe = StableDiffusionXLPipeline.from_pretrained(
+#    #"yodayo-ai/kivotos-xl-2.0",
+#    "yodayo-ai/holodayo-xl-2.1",
+#    torch_dtype=torch.float16,
+#    use_safetensors=True,
+#    custom_pipeline="lpw_stable_diffusion_xl",
+#    add_watermarker=False,
+#    variant="fp16"
+#)
+#pipe.to('cuda')
 prompt = "1girl, solo, upper body, v, smile, looking at viewer, outdoors, night, masterpiece, best quality, very aesthetic, absurdres"
 negative_prompt = "nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
+def nms(x, t, s):
+    x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
+    f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
+    f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
+    f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
+    f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
+    y = np.zeros_like(x)
+    for f in [f1, f2, f3, f4]:
+        np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
+    z = np.zeros_like(y, dtype=np.uint8)
+    z[y > t] = 255
+    return z
 @spaces.GPU
+def infer(image: PIL.Image.Image,prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps)-> PIL.Image.Image:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    controlnet_img = processor(image, scribble=False)
+    # following is some processing to simulate human sketch draw, different threshold can generate different width of lines
+    controlnet_img = np.array(controlnet_img)
+    controlnet_img = nms(controlnet_img, 127, 3)
+    controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
+    # higher threshold, thiner line
+    random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
+    controlnet_img[controlnet_img > random_val] = 255
+    controlnet_img[controlnet_img < 255] = 0
+    image = Image.fromarray(controlnet_img)
     generator = torch.Generator().manual_seed(seed)
+    output_image = pipe(
         prompt = prompt+", masterpiece, best quality, very aesthetic, absurdres",
         negative_prompt = negative_prompt,
         guidance_scale = guidance_scale,
         generator = generator
     ).images[0]
+    return output_image
 css="""
 #col-container {
             )
             run_button = gr.Button("Run", scale=0)
+        image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512))
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
+                #visible=False,
+                value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn"
             )
             seed = gr.Slider(
     run_button.click(
         fn = infer,
+        inputs = [image,prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
         outputs = [result]
     )