Spaces:
Running
on
Zero
Running
on
Zero
from typing import Tuple, Optional | |
import os | |
import gradio as gr | |
import numpy as np | |
import random | |
import spaces | |
import cv2 | |
from diffusers import DiffusionPipeline | |
from diffusers import FluxInpaintPipeline | |
import torch | |
from PIL import Image, ImageFilter | |
from huggingface_hub import login | |
from diffusers import AutoencoderTiny, AutoencoderKL | |
from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download | |
import copy | |
import random | |
import time | |
import boto3 | |
from io import BytesIO | |
from datetime import datetime | |
from diffusers.utils import load_image | |
import json | |
from preprocessor import Preprocessor | |
from diffusers.pipelines.flux.pipeline_flux_controlnet_inpaint import FluxControlNetInpaintPipeline | |
from diffusers.models.controlnet_flux import FluxControlNetModel | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
login(token=HF_TOKEN) | |
MAX_SEED = np.iinfo(np.int32).max | |
IMAGE_SIZE = 1024 | |
# init | |
dtype = torch.bfloat16 | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
base_model = "black-forest-labs/FLUX.1-dev" | |
controlnet_model = 'InstantX/FLUX.1-dev-Controlnet-Union-alpha' | |
controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16) | |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device) | |
good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device) | |
pipe = FluxControlNetInpaintPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=dtype, vae=taef1).to(device) | |
control_mode_ids = { | |
"scribble_hed": 0, | |
"canny": 0, # supported | |
"mlsd": 0, # supported | |
"tile": 1, # supported | |
"depth_midas": 2, # supported | |
"blur": 3, # supported | |
"openpose": 4, # supported | |
"gray": 5, # supported | |
"low_quality": 6, # supported | |
} | |
class calculateDuration: | |
def __init__(self, activity_name=""): | |
self.activity_name = activity_name | |
def __enter__(self): | |
self.start_time = time.time() | |
return self | |
def __exit__(self, exc_type, exc_value, traceback): | |
self.end_time = time.time() | |
self.elapsed_time = self.end_time - self.start_time | |
if self.activity_name: | |
print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds") | |
else: | |
print(f"Elapsed time: {self.elapsed_time:.6f} seconds") | |
def calculate_image_dimensions_for_flux( | |
original_resolution_wh: Tuple[int, int], | |
maximum_dimension: int = IMAGE_SIZE | |
) -> Tuple[int, int]: | |
width, height = original_resolution_wh | |
if width > height: | |
scaling_factor = maximum_dimension / width | |
else: | |
scaling_factor = maximum_dimension / height | |
new_width = int(width * scaling_factor) | |
new_height = int(height * scaling_factor) | |
new_width = new_width - (new_width % 32) | |
new_height = new_height - (new_height % 32) | |
return new_width, new_height | |
def is_mask_empty(image: Image.Image) -> bool: | |
gray_img = image.convert("L") | |
pixels = list(gray_img.getdata()) | |
return all(pixel == 0 for pixel in pixels) | |
def process_mask( | |
mask: Image.Image, | |
mask_inflation: Optional[int] = None, | |
mask_blur: Optional[int] = None | |
) -> Image.Image: | |
""" | |
Inflates and blurs the white regions of a mask. | |
Args: | |
mask (Image.Image): The input mask image. | |
mask_inflation (Optional[int]): The number of pixels to inflate the mask by. | |
mask_blur (Optional[int]): The radius of the Gaussian blur to apply. | |
Returns: | |
Image.Image: The processed mask with inflated and/or blurred regions. | |
""" | |
if mask_inflation and mask_inflation > 0: | |
mask_array = np.array(mask) | |
kernel = np.ones((mask_inflation, mask_inflation), np.uint8) | |
mask_array = cv2.dilate(mask_array, kernel, iterations=1) | |
mask = Image.fromarray(mask_array) | |
if mask_blur and mask_blur > 0: | |
mask = mask.filter(ImageFilter.GaussianBlur(radius=mask_blur)) | |
return mask | |
def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name): | |
print("upload_image_to_r2", account_id, access_key, secret_key, bucket_name) | |
connectionUrl = f"https://{account_id}.r2.cloudflarestorage.com" | |
s3 = boto3.client( | |
's3', | |
endpoint_url=connectionUrl, | |
region_name='auto', | |
aws_access_key_id=access_key, | |
aws_secret_access_key=secret_key | |
) | |
current_time = datetime.now().strftime("%Y/%m/%d/%H%M%S") | |
image_file = f"generated_images/{current_time}_{random.randint(0, MAX_SEED)}.png" | |
buffer = BytesIO() | |
image.save(buffer, "PNG") | |
buffer.seek(0) | |
s3.upload_fileobj(buffer, bucket_name, image_file) | |
print("upload finish", image_file) | |
return image_file | |
def run_flux( | |
image: Image.Image, | |
mask: Image.Image, | |
control_image: Image.Image, | |
control_mode: int, | |
prompt: str, | |
lora_path: str, | |
lora_weights: str, | |
lora_scale: float, | |
seed_slicer: int, | |
randomize_seed_checkbox: bool, | |
strength_slider: float, | |
num_inference_steps_slider: int, | |
resolution_wh: Tuple[int, int], | |
progress | |
) -> Image.Image: | |
print("Running FLUX...") | |
if lora_path and lora_weights: | |
with calculateDuration("load lora"): | |
print("start to load lora", lora_path, lora_weights) | |
pipe.unload_lora_weights() | |
pipe.load_lora_weights(lora_path, weight_name=lora_weights) | |
width, height = resolution_wh | |
if randomize_seed_checkbox: | |
seed_slicer = random.randint(0, MAX_SEED) | |
generator = torch.Generator().manual_seed(seed_slicer) | |
with calculateDuration("run pipe"): | |
genearte_image = pipe( | |
prompt=prompt, | |
image=image, | |
mask_image=mask, | |
control_image=control_image, | |
control_mode=control_mode, | |
width=width, | |
height=height, | |
strength=strength_slider, | |
generator=generator, | |
num_inference_steps=num_inference_steps_slider, | |
max_sequence_length=256, | |
joint_attention_kwargs={"scale": lora_scale} | |
).images[0] | |
return genearte_image | |
def process( | |
image_url: str, | |
mask_url: str, | |
inpainting_prompt_text: str, | |
mask_inflation_slider: int, | |
mask_blur_slider: int, | |
control_mode: str, | |
seed_slicer: int, | |
randomize_seed_checkbox: bool, | |
strength_slider: float, | |
num_inference_steps_slider: int, | |
lora_path: str, | |
lora_weights: str, | |
lora_scale: str, | |
upload_to_r2: bool, | |
account_id: str, | |
access_key: str, | |
secret_key: str, | |
bucket:str, | |
progress=gr.Progress(track_tqdm=True) | |
): | |
result = {"status": "false", "message": ""} | |
if not image_url: | |
gr.Info("please enter image url for inpaiting") | |
result["message"] = "invalid image url" | |
return None, json.dumps(result) | |
if not inpainting_prompt_text: | |
gr.Info("Please enter inpainting text prompt.") | |
result["message"] = "invalid inpainting prompt" | |
return None, json.dumps(result) | |
with calculateDuration("load image"): | |
image = load_image(image_url) | |
mask = load_image(mask_url) | |
if not image or not mask: | |
gr.Info("Please upload an image & mask by url.") | |
result["message"] = "can not load image" | |
return None, json.dumps(result) | |
# generate | |
with calculateDuration("resize & process mask"): | |
width, height = calculate_image_dimensions_for_flux(original_resolution_wh=image.size) | |
image = image.resize((width, height), Image.LANCZOS) | |
mask = mask.resize((width, height), Image.LANCZOS) | |
mask = process_mask(mask, mask_inflation=mask_inflation_slider, mask_blur=mask_blur_slider) | |
# generated control_ | |
with calculateDuration("Preprocessor Image"): | |
print("start to generate control image") | |
preprocessor = Preprocessor() | |
if control_mode == "depth_midas": | |
preprocessor.load("Midas") | |
control_image = preprocessor( | |
image=image, | |
image_resolution=width, | |
detect_resolution=512, | |
) | |
if control_mode == "openpose": | |
preprocessor.load("Openpose") | |
control_image = preprocessor( | |
image=image, | |
hand_and_face=True, | |
image_resolution=width, | |
detect_resolution=512, | |
) | |
if control_mode == "canny": | |
preprocessor.load("Canny") | |
control_image = preprocessor( | |
image=image, | |
image_resolution=width, | |
detect_resolution=512, | |
) | |
if control_mode == "mlsd": | |
preprocessor.load("MLSD") | |
control_image = preprocessor( | |
image=image_before, | |
image_resolution=width, | |
detect_resolution=512, | |
) | |
if control_mode == "scribble_hed": | |
preprocessor.load("HED") | |
control_image = preprocessor( | |
image=image_before, | |
image_resolution=image_resolution, | |
detect_resolution=preprocess_resolution, | |
) | |
control_mode_id = control_mode_ids[control_mode] | |
try: | |
generated_image = run_flux( | |
image=image, | |
mask=mask, | |
control_image=control_image, | |
control_mode=control_mode_id, | |
prompt=inpainting_prompt_text, | |
lora_path=lora_path, | |
lora_scale=lora_scale, | |
lora_weights=lora_weights, | |
seed_slicer=seed_slicer, | |
randomize_seed_checkbox=randomize_seed_checkbox, | |
strength_slider=strength_slider, | |
num_inference_steps_slider=num_inference_steps_slider, | |
resolution_wh=(width, height), | |
progress=progress | |
) | |
except: | |
result["message"] = "generate image failed" | |
return None, json.dumps(result) | |
print("run flux finish") | |
if upload_to_r2: | |
with calculateDuration("upload image"): | |
url = upload_image_to_r2(generated_image, account_id, access_key, secret_key, bucket) | |
result = {"status": "success", "message": "upload image success", "url": url} | |
else: | |
result = {"status": "success", "message": "Image generated but not uploaded"} | |
return generated_image, json.dumps(result) | |
with gr.Blocks() as demo: | |
gr.Markdown("Flux inpaint with lora") | |
with gr.Row(): | |
with gr.Column(): | |
image_url = gr.Text( | |
label="Orginal image url", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter image url for inpainting", | |
container=False | |
) | |
mask_url = gr.Text( | |
label="Mask image url", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter url of masking", | |
container=False, | |
) | |
inpainting_prompt_text_component = gr.Text( | |
label="Inpainting prompt", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter text to generate inpainting", | |
container=False, | |
) | |
control_mode = gr.Dropdown( | |
[ "canny", "depth_midas", "openpose", "mlsd", "low_quality", "gray", "blur", "tile"], label="Controlnet Model", info="choose controlnet model!", value="canny" | |
) | |
submit_button_component = gr.Button(value='Submit', variant='primary', scale=0) | |
with gr.Accordion("Lora Settings", open=True): | |
lora_path = gr.Textbox( | |
label="Lora model path", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter your model path", | |
info="Currently, only LoRA hosted on Hugging Face'model can be loaded properly.", | |
value="" | |
) | |
lora_weights = gr.Textbox( | |
label="Lora weights", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter your lora weights name", | |
value="" | |
) | |
lora_scale = gr.Slider( | |
label="Lora scale", | |
show_label=True, | |
minimum=0, | |
maximum=1, | |
step=0.1, | |
value=0.9, | |
) | |
with gr.Accordion("Advanced Settings", open=False): | |
with gr.Row(): | |
mask_inflation_slider_component = gr.Slider( | |
label="Mask inflation", | |
info="Adjusts the amount of mask edge expansion before " | |
"inpainting.", | |
minimum=0, | |
maximum=20, | |
step=1, | |
value=5, | |
) | |
mask_blur_slider_component = gr.Slider( | |
label="Mask blur", | |
info="Controls the intensity of the Gaussian blur applied to " | |
"the mask edges.", | |
minimum=0, | |
maximum=20, | |
step=1, | |
value=5, | |
) | |
seed_slicer_component = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=42, | |
) | |
randomize_seed_checkbox_component = gr.Checkbox( | |
label="Randomize seed", value=True) | |
with gr.Row(): | |
strength_slider_component = gr.Slider( | |
label="Strength", | |
info="Indicates extent to transform the reference `image`. " | |
"Must be between 0 and 1. `image` is used as a starting " | |
"point and more noise is added the higher the `strength`.", | |
minimum=0, | |
maximum=1, | |
step=0.01, | |
value=0.85, | |
) | |
num_inference_steps_slider_component = gr.Slider( | |
label="Number of inference steps", | |
info="The number of denoising steps. More denoising steps " | |
"usually lead to a higher quality image at the", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=20, | |
) | |
with gr.Accordion("R2 Settings", open=False): | |
upload_to_r2 = gr.Checkbox(label="Upload to R2", value=False) | |
with gr.Row(): | |
account_id = gr.Textbox(label="Account Id", placeholder="Enter R2 account id") | |
bucket = gr.Textbox(label="Bucket Name", placeholder="Enter R2 bucket name here") | |
with gr.Row(): | |
access_key = gr.Textbox(label="Access Key", placeholder="Enter R2 access key here") | |
secret_key = gr.Textbox(label="Secret Key", placeholder="Enter R2 secret key here") | |
with gr.Column(): | |
generated_image = gr.Image(label="Result", show_label=False) | |
output_json_component = gr.Code(label="JSON Result", language="json") | |
submit_button_component.click( | |
fn=process, | |
inputs=[ | |
image_url, | |
mask_url, | |
inpainting_prompt_text_component, | |
mask_inflation_slider_component, | |
mask_blur_slider_component, | |
control_mode, | |
seed_slicer_component, | |
randomize_seed_checkbox_component, | |
strength_slider_component, | |
num_inference_steps_slider_component, | |
lora_path, | |
lora_weights, | |
lora_scale, | |
upload_to_r2, | |
account_id, | |
access_key, | |
secret_key, | |
bucket | |
], | |
outputs=[ | |
generated_image, | |
output_json_component | |
] | |
) | |
demo.queue().launch() |