from diffusers import StableDiffusionControlNetPipeline, ControlNetModel from diffusers import UniPCMultistepScheduler import gradio as gr import torch import base64 from io import BytesIO from PIL import Image, ImageFilter canvas_html = '' load_js = """ async () => { const url = "https://huggingface.co/datasets/mishig/gradio-components/raw/main/mannequinAll.js" fetch(url) .then(res => res.text()) .then(text => { const script = document.createElement('script'); script.type = "module" script.src = URL.createObjectURL(new Blob([text], { type: 'application/javascript' })); document.head.appendChild(script); }); } """ get_js_image = """ async (canvas, prompt) => { const poseMakerEl = document.querySelector("pose-maker"); const imgBase64 = poseMakerEl.captureScreenshot(); return [imgBase64, prompt] } """ # Models controlnet = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 ) pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 ) pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) # This command loads the individual model components on GPU on-demand. So, we don't # need to explicitly call pipe.to("cuda"). pipe.enable_model_cpu_offload() # xformers pipe.enable_xformers_memory_efficient_attention() # Generator seed, generator = torch.manual_seed(0) def generate_images(canvas, prompt): try: base64_img = canvas image_data = base64.b64decode(base64_img.split(',')[1]) input_img = Image.open(BytesIO(image_data)).convert( 'RGB').resize((512, 512)) input_img = input_img.filter(ImageFilter.GaussianBlur(radius=5)) output = pipe( prompt, input_img, generator=generator, num_images_per_prompt=2, num_inference_steps=20, ) all_outputs = [] for image in output.images: all_outputs.append(image) return all_outputs except Exception as e: raise gr.Error(str(e)) def placeholder_fn(axis): pass js_change_rotation_axis = """ async (axis) => { const poseMakerEl = document.querySelector("pose-maker"); poseMakerEl.changeRotationAxis(axis); } """ js_pose_template = """ async (pose) => { const poseMakerEl = document.querySelector("pose-maker"); poseMakerEl.setPose(pose); } """ with gr.Blocks() as blocks: gr.HTML( """

Pose in 3D & Render with ControlNet (SD-1.5)

Using ControlNet and three.js/mannequin.js

For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. Duplicate Space

""" ) with gr.Row(): with gr.Column(): canvas = gr.HTML(canvas_html, elem_id="canvas_html", visible=True) with gr.Row(): rotation_axis = gr.Radio(["x", "y", "z"], value="x", label="Joint rotation axis") pose_template = gr.Radio(["regular", "ballet", "handstand", "split", "kick", "chilling"], value="regular", label="Pose template") prompt = gr.Textbox( label="Enter your prompt", max_lines=1, placeholder="best quality, extremely detailed", ) run_button = gr.Button("Generate") with gr.Column(): gallery = gr.Gallery().style(grid=[2], height="auto") rotation_axis.change(fn=placeholder_fn, inputs=[rotation_axis], outputs=[], queue=False, _js=js_change_rotation_axis) pose_template.change(fn=placeholder_fn, inputs=[pose_template], outputs=[], queue=False, _js=js_pose_template) run_button.click(fn=generate_images, inputs=[canvas, prompt], outputs=[gallery], _js=get_js_image) blocks.load(None, None, None, _js=load_js) blocks.launch(debug=True)