import spaces import rembg import torch from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL import cv2 import numpy as np from PIL import Image import gradio as gr # pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16") # pipe.to("cuda") def check_prompt(prompt): if prompt is None: raise gr.Error("Please enter a prompt!") controlnet = ControlNetModel.from_pretrained( "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True ) vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16, use_safetensors=True ) pipe.to("cuda") # Function to generate an image from text using diffusion @spaces.GPU def generate_image(prompt, negative_prompt, control_image, scale=0.5): prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo" canny_image = get_canny(control_image) image = pipe( prompt, negative_prompt=negative_prompt, image=canny_image, controlnet_conditioning_scale=scale, ).images[0] image2 = rembg.remove(image) return image2 def get_canny(image): image = np.array(image) low_threshold = 100 high_threshold = 200 image = cv2.Canny(image,low_threshold,high_threshold) image = image[:,:,None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image) return canny_image