AnyV2V / black_box_image_edit /cosxl_edit.py
vinesmsuic's picture
update
118a337
from huggingface_hub import hf_hub_download
import torch
import PIL
class CosXLEdit():
"""
Edit Cos Stable Diffusion XL 1.0 Base is tuned to use a Cosine-Continuous EDM VPred schedule, and then upgraded to perform instructed image editing.
Reference: https://huggingface.co/stabilityai/cosxl
"""
def __init__(self, device="cuda"):
"""
Attributes:
pipe (CosStableDiffusionXLInstructPix2PixPipeline): The InstructPix2Pix pipeline for image transformation.
Args:
device (str, optional): Device on which the pipeline runs. Defaults to "cuda".
"""
from diffusers import EDMEulerScheduler
from .cosxl.custom_pipeline import CosStableDiffusionXLInstructPix2PixPipeline
from .cosxl.utils import set_timesteps_patched
EDMEulerScheduler.set_timesteps = set_timesteps_patched
try:
edit_file = hf_hub_download(repo_id="TIGER-Lab/cosxl", filename="cosxl_edit.safetensors")
self.pipe = CosStableDiffusionXLInstructPix2PixPipeline.from_single_file(
edit_file, num_in_channels=8
)
except:
edit_file_path = "./black_box_image_edit/cosxl/cosxl_edit.safetensors"
self.pipe = CosStableDiffusionXLInstructPix2PixPipeline.from_single_file(
edit_file_path, num_in_channels=8
)
self.pipe.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
self.pipe.to(device)
self.pipe.enable_vae_tiling()
self.pipe.enable_model_cpu_offload()
def infer_one_image(self, src_image: PIL.Image.Image = None, src_prompt: str = None, target_prompt: str = None, instruct_prompt: str = None, seed: int = 42, negative_prompt=""):
"""
Modifies the source image based on the provided instruction prompt.
Args:
src_image (PIL.Image.Image): Source image in RGB format.
instruct_prompt (str): Caption for editing the image.
seed (int, optional): Seed for random generator. Defaults to 42.
Returns:
PIL.Image.Image: The transformed image.
"""
src_image = src_image.convert('RGB') # force it to RGB format
generator = torch.manual_seed(seed)
resolution = 1024
preprocessed_image = src_image.resize((resolution, resolution))
image = self.pipe(prompt=instruct_prompt,
image=preprocessed_image,
height=resolution,
width=resolution,
negative_prompt=negative_prompt,
guidance_scale=7,
num_inference_steps=20,
generator=generator).images[0]
image = image.resize((src_image.width, src_image.height))
return image