Spaces:
Paused
Paused
import os | |
import random | |
import gradio as gr | |
import numpy as np | |
import torch | |
from PIL import Image, ImageFilter | |
from transformers import CLIPTextModel | |
from diffusers import UniPCMultistepScheduler | |
from model.BrushNet_CA import BrushNetModel | |
from model.diffusers_c.models import UNet2DConditionModel | |
from pipeline.pipeline_PowerPaint_Brushnet_CA import StableDiffusionPowerPaintBrushNetPipeline | |
from utils.utils import TokenizerWrapper, add_tokens | |
base_path = "./PowerPaint_v2" | |
os.system("apt install git") | |
os.system("apt install git-lfs") | |
os.system(f"git lfs clone https://code.openxlab.org.cn/zhuangjunhao/PowerPaint_v2.git {base_path}") | |
os.system(f"cd {base_path} && git lfs pull") | |
os.system("cd ..") | |
torch.set_grad_enabled(False) | |
context_prompt = "" | |
context_negative_prompt = "" | |
base_model_path = "./PowerPaint_v2/realisticVisionV60B1_v51VAE/" | |
dtype = torch.float16 | |
unet = UNet2DConditionModel.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", subfolder="unet", revision=None, torch_dtype=dtype | |
) | |
text_encoder_brushnet = CLIPTextModel.from_pretrained( | |
"runwayml/stable-diffusion-v1-5", subfolder="text_encoder", revision=None, torch_dtype=dtype | |
) | |
brushnet = BrushNetModel.from_unet(unet) | |
global pipe | |
pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained( | |
base_model_path, | |
brushnet=brushnet, | |
text_encoder_brushnet=text_encoder_brushnet, | |
torch_dtype=dtype, | |
low_cpu_mem_usage=False, | |
safety_checker=None, | |
) | |
pipe.unet = UNet2DConditionModel.from_pretrained(base_model_path, subfolder="unet", revision=None, torch_dtype=dtype) | |
pipe.tokenizer = TokenizerWrapper(from_pretrained=base_model_path, subfolder="tokenizer", revision=None) | |
add_tokens( | |
tokenizer=pipe.tokenizer, | |
text_encoder=pipe.text_encoder_brushnet, | |
placeholder_tokens=["P_ctxt", "P_shape", "P_obj"], | |
initialize_tokens=["a", "a", "a"], | |
num_vectors_per_token=10, | |
) | |
from safetensors.torch import load_model | |
load_model(pipe.brushnet, "./PowerPaint_v2/PowerPaint_Brushnet/diffusion_pytorch_model.safetensors") | |
pipe.text_encoder_brushnet.load_state_dict( | |
torch.load("./PowerPaint_v2/PowerPaint_Brushnet/pytorch_model.bin"), strict=False | |
) | |
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
pipe.enable_model_cpu_offload() | |
global current_control | |
current_control = "canny" | |
# controlnet_conditioning_scale = 0.8 | |
def set_seed(seed): | |
torch.manual_seed(seed) | |
torch.cuda.manual_seed(seed) | |
torch.cuda.manual_seed_all(seed) | |
np.random.seed(seed) | |
random.seed(seed) | |
def add_task(control_type): | |
# print(control_type) | |
if control_type == "object-removal": | |
promptA = "P_ctxt" | |
promptB = "P_ctxt" | |
negative_promptA = "P_obj" | |
negative_promptB = "P_obj" | |
elif control_type == "context-aware": | |
promptA = "P_ctxt" | |
promptB = "P_ctxt" | |
negative_promptA = "" | |
negative_promptB = "" | |
elif control_type == "shape-guided": | |
promptA = "P_shape" | |
promptB = "P_ctxt" | |
negative_promptA = "P_shape" | |
negative_promptB = "P_ctxt" | |
elif control_type == "image-outpainting": | |
promptA = "P_ctxt" | |
promptB = "P_ctxt" | |
negative_promptA = "P_obj" | |
negative_promptB = "P_obj" | |
else: | |
promptA = "P_obj" | |
promptB = "P_obj" | |
negative_promptA = "P_obj" | |
negative_promptB = "P_obj" | |
return promptA, promptB, negative_promptA, negative_promptB | |
def predict( | |
input_image, | |
prompt, | |
fitting_degree, | |
ddim_steps, | |
scale, | |
seed, | |
negative_prompt, | |
task, | |
left_expansion_ratio, | |
right_expansion_ratio, | |
top_expansion_ratio, | |
bottom_expansion_ratio, | |
): | |
size1, size2 = input_image["image"].convert("RGB").size | |
if task != "image-outpainting": | |
input_image["image"] = input_image["image"].convert("RGB").resize((1024, 1024), Image.LANCZOS) | |
else: | |
input_image["image"] = input_image["image"].convert("RGB").resize((1024, 1024), Image.LANCZOS) | |
if task == "image-outpainting" or task == "context-aware": | |
prompt = prompt + " empty scene" | |
if task == "object-removal": | |
prompt = prompt + " empty scene blur" | |
if ( | |
left_expansion_ratio is not None and right_expansion_ratio is not None | |
and top_expansion_ratio is not None and bottom_expansion_ratio is not None | |
): | |
o_W, o_H = input_image["image"].convert("RGB").size | |
c_W = int((1 + left_expansion_ratio + right_expansion_ratio) * o_W) | |
c_H = int((1 + top_expansion_ratio + bottom_expansion_ratio) * o_H) | |
expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127 | |
original_img = np.array(input_image["image"]) | |
expand_img[ | |
int(top_expansion_ratio * o_H):int(top_expansion_ratio * o_H) + o_H, | |
int(left_expansion_ratio * o_W):int(left_expansion_ratio * o_W) + o_W, | |
: | |
] = original_img | |
blurry_gap = 10 | |
expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255 | |
expand_mask[ | |
int(top_expansion_ratio * o_H) + blurry_gap:int(top_expansion_ratio * o_H) + o_H - blurry_gap, | |
int(left_expansion_ratio * o_W) + blurry_gap:int(left_expansion_ratio * o_W) + o_W - blurry_gap, | |
: | |
] = 0 | |
input_image["image"] = Image.fromarray(expand_img) | |
input_image["mask"] = Image.fromarray(expand_mask) | |
promptA, promptB, negative_promptA, negative_promptB = add_task(task) | |
img = np.array(input_image["image"].convert("RGB")) | |
W = int(np.shape(img)[0] - np.shape(img)[0] % 8) | |
H = int(np.shape(img)[1] - np.shape(img)[1] % 8) | |
input_image["image"] = input_image["image"].resize((H, W), Image.LANCZOS) | |
input_image["mask"] = input_image["mask"].resize((H, W), Image.LANCZOS) | |
np_inpimg = np.array(input_image["image"]) | |
np_inmask = np.array(input_image["mask"]) / 255.0 | |
if len(np_inmask.shape)==2: | |
np_inmask = np.expand_dims(np_inmask, axis=-1) | |
# return np_inpimg, np_inmask | |
np_inpimg = np_inpimg * (1 - np_inmask) | |
input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB") | |
# return input_image | |
set_seed(seed) | |
global pipe | |
result = pipe( | |
promptA=promptA, | |
promptB=promptB, | |
promptU=prompt, | |
tradoff=fitting_degree, | |
tradoff_nag=fitting_degree, | |
image=input_image["image"].convert("RGB"), | |
mask=input_image["mask"].convert("RGB"), | |
num_inference_steps=ddim_steps, | |
generator=torch.Generator("cuda").manual_seed(seed), | |
brushnet_conditioning_scale=1.0, | |
negative_promptA=negative_promptA, | |
negative_promptB=negative_promptB, | |
negative_promptU=negative_prompt, | |
guidance_scale=scale, | |
width=H, | |
height=W, | |
).images[0] | |
mask_np = np.array(input_image["mask"].convert("RGB")) | |
red = np.array(result).astype("float") * 1 | |
red[:, :, 0] = 180.0 | |
red[:, :, 2] = 0 | |
red[:, :, 1] = 0 | |
result_m = np.array(result) | |
result_m = Image.fromarray( | |
( | |
result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) + mask_np.astype("float") / 512.0 * red | |
).astype("uint8") | |
) | |
m_img = input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=3)) | |
m_img = np.asarray(m_img) / 255.0 | |
img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0 | |
ours_np = np.asarray(result) / 255.0 | |
ours_np = ours_np * m_img + (1 - m_img) * img_np | |
result_paste = Image.fromarray(np.uint8(ours_np * 255)) | |
dict_res = [input_image["mask"].convert("RGB"), result_m] | |
dict_out = [result] | |
return dict_out, dict_res | |
import gradio as gr | |
def custom_infer(input_image_path, | |
input_mask_path=None, | |
prompt="", | |
fitting_degree=0.5, | |
ddim_steps=20, | |
scale=5, | |
seed=143, | |
negative_prompt="", | |
task="text-guided", | |
left_expansion_ratio=0.2, | |
right_expansion_ratio=0.2, | |
top_expansion_ratio=0.2, | |
bottom_expansion_ratio=0.2): | |
image = Image.open(input_image_path) | |
if input_mask_path: | |
mask = Image.open(input_mask_path) | |
if task == "text-guided": | |
input_dict = {"image": image, "mask": mask} | |
a, b = predict(input_dict, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, None, None, None, None) | |
if task == "image-outpainting": | |
input_dict = {"image": image} | |
a, b = predict(input_dict, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, left_expansion_ratio, right_expansion_ratio, top_expansion_ratio, bottom_expansion_ratio) | |
return a[0] | |
import gradio as gr | |
# Define the Gradio interface using the new version | |
inputs = [ | |
gr.Image(label="Input Image", type="filepath"), | |
gr.Image(label="Input Mask (optional)", type="filepath"), | |
gr.Textbox(label="Prompt", value="A beautiful landscape"), | |
gr.Slider(label="Fitting Degree", minimum=1, maximum=20, value=7, step=1), | |
gr.Slider(label="DDIM Steps", minimum=10, maximum=50, value=20, step=1), | |
gr.Slider(label="Scale", minimum=1, maximum=20, value=7.5, step=0.1), | |
gr.Slider(label="Use Seed", minimum=0, maximum=1300000, value=143, step=1), | |
gr.Textbox(label="Negative Prompt", value="blur, low quality"), | |
gr.Radio(label="Task", choices=["text-guided", "image-outpainting"], value="image-outpainting"), | |
gr.Slider(label="Left Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01), | |
gr.Slider(label="Right Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01), | |
gr.Slider(label="Top Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01), | |
gr.Slider(label="Bottom Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01) | |
] | |
outputs = [ | |
gr.Image(label="Output Image") | |
] | |
# Create the Gradio interface | |
demo = gr.Interface(fn=custom_infer, inputs=inputs, outputs=outputs, title="Inference") | |
demo.queue(concurrency_count=1, max_size=1, api_open=True) | |
demo.launch(show_api=True) | |