File size: 1,772 Bytes
a1f69bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import spaces
import rembg
import torch
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
import cv2
import numpy as np
from PIL import Image
import gradio as gr
# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe.to("cuda")
def check_prompt(prompt):
if prompt is None:
raise gr.Error("Please enter a prompt!")
controlnet = ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0",
torch_dtype=torch.float16,
use_safetensors=True
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
use_safetensors=True
)
pipe.to("cuda")
# Function to generate an image from text using diffusion
@spaces.GPU
def generate_image(prompt, negative_prompt, control_image, scale=0.5):
prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
canny_image = get_canny(control_image)
image = pipe(
prompt,
negative_prompt=negative_prompt,
image=canny_image,
controlnet_conditioning_scale=scale,
).images[0]
image2 = rembg.remove(image)
return image2
def get_canny(image):
image = np.array(image)
low_threshold = 100
high_threshold = 200
image = cv2.Canny(image,low_threshold,high_threshold)
image = image[:,:,None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
return canny_image
|