controlnet-3d-pose

Runtime error

File size: 5,089 Bytes

from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
import gradio as gr
import torch
import base64
from io import BytesIO
from PIL import Image, ImageFilter

canvas_html = '<pose-maker/>'
load_js = """
async () => {
  const url = "https://huggingface.co/datasets/mishig/gradio-components/raw/main/mannequinAll.js"
  fetch(url)
    .then(res => res.text())
    .then(text => {
      const script = document.createElement('script');
      script.type = "module"
      script.src = URL.createObjectURL(new Blob([text], { type: 'application/javascript' }));
      document.head.appendChild(script);
    });
}
"""

get_js_image = """
async (canvas, prompt) => {
  const poseMakerEl = document.querySelector("pose-maker");
  const imgBase64 = poseMakerEl.captureScreenshot();
  return [imgBase64, prompt]
}
"""

# Models
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# This command loads the individual model components on GPU on-demand. So, we don't
# need to explicitly call pipe.to("cuda").
pipe.enable_model_cpu_offload()

# xformers
pipe.enable_xformers_memory_efficient_attention()

# Generator seed,
generator = torch.manual_seed(0)


def generate_images(canvas, prompt):
    try:
        base64_img = canvas
        image_data = base64.b64decode(base64_img.split(',')[1])
        input_img = Image.open(BytesIO(image_data)).convert(
            'RGB').resize((512, 512))
        input_img = input_img.filter(ImageFilter.GaussianBlur(radius=5))
        output = pipe(
            prompt,
            input_img,
            generator=generator,
            num_images_per_prompt=2,
            num_inference_steps=20,
        )
        all_outputs = []
        for image in output.images:
            all_outputs.append(image)
        return all_outputs
    except Exception as e:
        raise gr.Error(str(e))

def placeholder_fn(axis):
    pass

js_change_rotation_axis = """
async (axis) => {
  const poseMakerEl = document.querySelector("pose-maker");
  poseMakerEl.changeRotationAxis(axis);
}
"""

js_pose_template = """
async (pose) => {
  const poseMakerEl = document.querySelector("pose-maker");
  poseMakerEl.setPose(pose);
}
"""

with gr.Blocks() as blocks:
    gr.HTML(
        """
            <div style="text-align: center; margin: 0 auto;">
              <div
                style="
                  display: inline-flex;
                  align-items: center;
                  gap: 0.8rem;
                  font-size: 1.75rem;
                "
              >
                <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
                  Pose in 3D & Render with ControlNet (SD-1.5)
                </h1>
              </div>
              <p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
                Using <a href="https://github.com/lllyasviel/ControlNet">ControlNet</a> and <a href="https://boytchev.github.io/mannequin.js/">three.js/mannequin.js</a>
              </p>
              <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>
            </div>
        """
    )
    with gr.Row():
        with gr.Column():
            canvas = gr.HTML(canvas_html, elem_id="canvas_html", visible=True)
            with gr.Row():
                rotation_axis = gr.Radio(["x", "y", "z"], value="x", label="Joint rotation axis")
                pose_template = gr.Radio(["regular", "ballet", "handstand", "split", "kick", "chilling"], value="regular", label="Pose template")
            prompt = gr.Textbox(
                label="Enter your prompt",
                max_lines=1,
                placeholder="best quality, extremely detailed",
            )
            run_button = gr.Button("Generate")
        with gr.Column():
            gallery = gr.Gallery().style(grid=[2], height="auto")
    rotation_axis.change(fn=placeholder_fn,
                            inputs=[rotation_axis],
                            outputs=[],
                            queue=False,
                            _js=js_change_rotation_axis)
    pose_template.change(fn=placeholder_fn,
                            inputs=[pose_template],
                            outputs=[],
                            queue=False,
                            _js=js_pose_template)
    run_button.click(fn=generate_images,
                     inputs=[canvas, prompt],
                     outputs=[gallery],
                     _js=get_js_image)
    blocks.load(None, None, None, _js=load_js)

blocks.launch(debug=True)