ShoeGenv2

Runtime error

File size: 1,772 Bytes

a1f69bb

import spaces
import rembg
import torch
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL
import cv2
import numpy as np
from PIL import Image
import gradio as gr

# pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# pipe.to("cuda")

def check_prompt(prompt):
    if prompt is None:
        raise gr.Error("Please enter a prompt!")

controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=torch.float16,
    use_safetensors=True
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=torch.float16,
    use_safetensors=True
)

pipe.to("cuda")

# Function to generate an image from text using diffusion
@spaces.GPU
def generate_image(prompt, negative_prompt, control_image, scale=0.5):
    prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
    
    canny_image = get_canny(control_image)

    image = pipe(
        prompt,
        negative_prompt=negative_prompt,
        image=canny_image,
        controlnet_conditioning_scale=scale,
    ).images[0]
    image2 = rembg.remove(image)

    return image2

def get_canny(image):
    image = np.array(image)

    low_threshold = 100
    high_threshold = 200

    image = cv2.Canny(image,low_threshold,high_threshold)
    image = image[:,:,None]
    image = np.concatenate([image, image, image], axis=2)
    canny_image = Image.fromarray(image)
    return canny_image