Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import argparse | |
from omegaconf import OmegaConf | |
from gligen.task_grounded_generation import grounded_generation_box, load_ckpt | |
from ldm.util import default_device | |
import json | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
from functools import partial | |
import math | |
from contextlib import nullcontext | |
from gradio import processing_utils | |
from typing import Optional | |
from huggingface_hub import hf_hub_download | |
hf_hub_download = partial(hf_hub_download, library_name="gligen_demo") | |
import openai | |
from gradio.components import Textbox, Text | |
import os | |
arg_bool = lambda x: x.lower() == 'true' | |
device = default_device() | |
print(f"GLIGEN uses {device.upper()} device.") | |
if device == "cpu": | |
print("It will be sloooow. Consider using GPU support with CUDA or (in case of M1/M2 Apple Silicon) MPS.") | |
elif device == "mps": | |
print("The fastest you can get on M1/2 Apple Silicon. Yet, still many opimizations are switched off and it will is much slower than CUDA.") | |
def parse_option(): | |
parser = argparse.ArgumentParser('GLIGen Demo', add_help=False) | |
parser.add_argument("--folder", type=str, default="create_samples", help="path to OUTPUT") | |
parser.add_argument("--official_ckpt", type=str, default='ckpts/sd-v1-4.ckpt', help="") | |
parser.add_argument("--guidance_scale", type=float, default=5, help="") | |
parser.add_argument("--alpha_scale", type=float, default=1, help="scale tanh(alpha). If 0, the behaviour is same as original model") | |
parser.add_argument("--load-text-box-generation", type=arg_bool, default=True, help="Load text-box generation pipeline.") | |
parser.add_argument("--load-text-box-inpainting", type=arg_bool, default=False, help="Load text-box inpainting pipeline.") | |
parser.add_argument("--load-text-image-box-generation", type=arg_bool, default=False, help="Load text-image-box generation pipeline.") | |
args = parser.parse_args() | |
return args | |
args = parse_option() | |
def load_from_hf(repo_id, filename='diffusion_pytorch_model.bin'): | |
cache_file = hf_hub_download(repo_id=repo_id, filename=filename) | |
return torch.load(cache_file, map_location='cpu') | |
def load_ckpt_config_from_hf(modality): | |
ckpt = load_from_hf(f'gligen/{modality}') | |
config = load_from_hf('gligen/demo_config_legacy', filename=f'{modality}.pth') | |
return ckpt, config | |
if args.load_text_box_generation: | |
pretrained_ckpt_gligen, config = load_ckpt_config_from_hf('gligen-generation-text-box') | |
config = OmegaConf.create( config["_content"] ) # config used in training | |
config.update( vars(args) ) | |
config.model['params']['is_inpaint'] = False | |
config.model['params']['is_style'] = False | |
loaded_model_list = load_ckpt(config, pretrained_ckpt_gligen) | |
if args.load_text_box_inpainting: | |
pretrained_ckpt_gligen_inpaint, config = load_ckpt_config_from_hf('gligen-inpainting-text-box') | |
config = OmegaConf.create( config["_content"] ) # config used in training | |
config.update( vars(args) ) | |
config.model['params']['is_inpaint'] = True | |
config.model['params']['is_style'] = False | |
loaded_model_list_inpaint = load_ckpt(config, pretrained_ckpt_gligen_inpaint) | |
if args.load_text_image_box_generation: | |
pretrained_ckpt_gligen_style, config = load_ckpt_config_from_hf('gligen-generation-text-image-box') | |
config = OmegaConf.create( config["_content"] ) # config used in training | |
config.update( vars(args) ) | |
config.model['params']['is_inpaint'] = False | |
config.model['params']['is_style'] = True | |
loaded_model_list_style = load_ckpt(config, pretrained_ckpt_gligen_style) | |
def load_clip_model(): | |
from transformers import CLIPProcessor, CLIPModel | |
version = "openai/clip-vit-large-patch14" | |
model = CLIPModel.from_pretrained(version).to(device) | |
processor = CLIPProcessor.from_pretrained(version) | |
return { | |
'version': version, | |
'model': model, | |
'processor': processor, | |
} | |
clip_model = load_clip_model() | |
class ImageMask(gr.components.Image): | |
""" | |
Sets: source="canvas", tool="sketch" | |
""" | |
is_template = True | |
def __init__(self, **kwargs): | |
super().__init__(source="upload", tool="sketch", interactive=True, **kwargs) | |
def preprocess(self, x): | |
if x is None: | |
return x | |
if self.tool == "sketch" and self.source in ["upload", "webcam"] and type(x) != dict: | |
decode_image = processing_utils.decode_base64_to_image(x) | |
width, height = decode_image.size | |
mask = np.zeros((height, width, 4), dtype=np.uint8) | |
mask[..., -1] = 255 | |
mask = self.postprocess(mask) | |
x = {'image': x, 'mask': mask} | |
return super().preprocess(x) | |
class Blocks(gr.Blocks): | |
def __init__( | |
self, | |
theme: str = "default", | |
analytics_enabled: Optional[bool] = None, | |
mode: str = "blocks", | |
title: str = "Gradio", | |
css: Optional[str] = None, | |
**kwargs, | |
): | |
self.extra_configs = { | |
'thumbnail': kwargs.pop('thumbnail', ''), | |
'url': kwargs.pop('url', 'https://gradio.app/'), | |
'creator': kwargs.pop('creator', 'Jenny Sun'), | |
} | |
super(Blocks, self).__init__(theme, analytics_enabled, mode, title, css, **kwargs) | |
def get_config_file(self): | |
config = super(Blocks, self).get_config_file() | |
for k, v in self.extra_configs.items(): | |
config[k] = v | |
return config | |
''' | |
inference model | |
''' | |
def inference(task, language_instruction, grounding_instruction, inpainting_boxes_nodrop, image, | |
alpha_sample, guidance_scale, batch_size, | |
fix_seed, rand_seed, actual_mask, style_image, | |
*args, **kwargs): | |
grounding_instruction = json.loads(grounding_instruction) | |
phrase_list, location_list = [], [] | |
for k, v in grounding_instruction.items(): | |
phrase_list.append(k) | |
location_list.append(v) | |
placeholder_image = Image.open('images/teddy.jpg').convert("RGB") | |
image_list = [placeholder_image] * len(phrase_list) # placeholder input for visual prompt, which is disabled | |
batch_size = int(batch_size) | |
if not 1 <= batch_size <= 4: | |
batch_size = 2 | |
if style_image == None: | |
has_text_mask = 1 | |
has_image_mask = 0 # then we hack above 'image_list' | |
else: | |
valid_phrase_len = len(phrase_list) | |
phrase_list += ['placeholder'] | |
has_text_mask = [1]*valid_phrase_len + [0] | |
image_list = [placeholder_image]*valid_phrase_len + [style_image] | |
has_image_mask = [0]*valid_phrase_len + [1] | |
location_list += [ [0.0, 0.0, 1, 0.01] ] # style image grounding location | |
if task == 'Grounded Inpainting': | |
alpha_sample = 1.0 | |
instruction = dict( | |
prompt = language_instruction, | |
phrases = phrase_list, | |
images = image_list, | |
locations = location_list, | |
alpha_type = [alpha_sample, 0, 1.0 - alpha_sample], | |
has_text_mask = has_text_mask, | |
has_image_mask = has_image_mask, | |
save_folder_name = language_instruction, | |
guidance_scale = guidance_scale, | |
batch_size = batch_size, | |
fix_seed = bool(fix_seed), | |
rand_seed = int(rand_seed), | |
actual_mask = actual_mask, | |
inpainting_boxes_nodrop = inpainting_boxes_nodrop, | |
) | |
# float16 autocasting only CUDA device | |
with torch.autocast(device_type='cuda', dtype=torch.float16) if device == "cuda" else nullcontext(): | |
if task == 'Grounded Generation': | |
if style_image == None: | |
return grounded_generation_box(loaded_model_list, instruction, *args, **kwargs) | |
else: | |
return grounded_generation_box(loaded_model_list_style, instruction, *args, **kwargs) | |
elif task == 'Grounded Inpainting': | |
assert image is not None | |
instruction['input_image'] = image.convert("RGB") | |
return grounded_generation_box(loaded_model_list_inpaint, instruction, *args, **kwargs) | |
def draw_box(boxes=[], texts=[], img=None): | |
if len(boxes) == 0 and img is None: | |
return None | |
if img is None: | |
img = Image.new('RGB', (512, 512), (255, 255, 255)) | |
colors = ["red", "olive", "blue", "green", "orange", "brown", "cyan", "purple"] | |
draw = ImageDraw.Draw(img) | |
font = ImageFont.truetype("DejaVuSansMono.ttf", size=18) | |
for bid, box in enumerate(boxes): | |
draw.rectangle([box[0], box[1], box[2], box[3]], outline=colors[bid % len(colors)], width=4) | |
anno_text = texts[bid] | |
draw.rectangle([box[0], box[3] - int(font.size * 1.2), box[0] + int((len(anno_text) + 0.8) * font.size * 0.6), box[3]], outline=colors[bid % len(colors)], fill=colors[bid % len(colors)], width=4) | |
draw.text([box[0] + int(font.size * 0.2), box[3] - int(font.size*1.2)], anno_text, font=font, fill=(255,255,255)) | |
return img | |
def get_concat(ims): | |
if len(ims) == 1: | |
n_col = 1 | |
else: | |
n_col = 2 | |
n_row = math.ceil(len(ims) / 2) | |
dst = Image.new('RGB', (ims[0].width * n_col, ims[0].height * n_row), color="white") | |
for i, im in enumerate(ims): | |
row_id = i // n_col | |
col_id = i % n_col | |
dst.paste(im, (im.width * col_id, im.height * row_id)) | |
return dst | |
def auto_append_grounding(language_instruction, grounding_texts): | |
for grounding_text in grounding_texts: | |
if grounding_text not in language_instruction and grounding_text != 'auto': | |
language_instruction += "; " + grounding_text | |
print(language_instruction) | |
return language_instruction | |
def generate(task, language_instruction, grounding_texts, sketch_pad, | |
alpha_sample, guidance_scale, batch_size, | |
fix_seed, rand_seed, use_actual_mask, append_grounding, style_cond_image, | |
state): | |
if 'boxes' not in state: | |
state['boxes'] = [] | |
boxes = state['boxes'] | |
grounding_texts = [x.strip() for x in grounding_texts.split(';')] | |
assert len(boxes) == len(grounding_texts) | |
boxes = (np.asarray(boxes) / 512).tolist() | |
grounding_instruction = json.dumps({obj: box for obj,box in zip(grounding_texts, boxes)}) | |
image = None | |
actual_mask = None | |
if task == 'Grounded Inpainting': | |
image = state.get('original_image', sketch_pad['image']).copy() | |
image = center_crop(image) | |
image = Image.fromarray(image) | |
if use_actual_mask: | |
actual_mask = sketch_pad['mask'].copy() | |
if actual_mask.ndim == 3: | |
actual_mask = actual_mask[..., 0] | |
actual_mask = center_crop(actual_mask, tgt_size=(64, 64)) | |
actual_mask = torch.from_numpy(actual_mask == 0).float() | |
if state.get('inpaint_hw', None): | |
boxes = np.asarray(boxes) * 0.9 + 0.05 | |
boxes = boxes.tolist() | |
grounding_instruction = json.dumps({obj: box for obj,box in zip(grounding_texts, boxes) if obj != 'auto'}) | |
if append_grounding: | |
language_instruction = auto_append_grounding(language_instruction, grounding_texts) | |
gen_images, gen_overlays = inference( | |
task, language_instruction, grounding_instruction, boxes, image, | |
alpha_sample, guidance_scale, batch_size, | |
fix_seed, rand_seed, actual_mask, style_cond_image, clip_model=clip_model, | |
) | |
for idx, gen_image in enumerate(gen_images): | |
if task == 'Grounded Inpainting' and state.get('inpaint_hw', None): | |
hw = min(*state['original_image'].shape[:2]) | |
gen_image = sized_center_fill(state['original_image'].copy(), np.array(gen_image.resize((hw, hw))), hw, hw) | |
gen_image = Image.fromarray(gen_image) | |
gen_images[idx] = gen_image | |
blank_samples = batch_size % 2 if batch_size > 1 else 0 | |
gen_images = [gr.Image.update(value=x, visible=True) for i,x in enumerate(gen_images)] \ | |
+ [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \ | |
+ [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)] | |
return gen_images + [state] | |
def binarize(x): | |
return (x != 0).astype('uint8') * 255 | |
def sized_center_crop(img, cropx, cropy): | |
y, x = img.shape[:2] | |
startx = x // 2 - (cropx // 2) | |
starty = y // 2 - (cropy // 2) | |
return img[starty:starty+cropy, startx:startx+cropx] | |
def sized_center_fill(img, fill, cropx, cropy): | |
y, x = img.shape[:2] | |
startx = x // 2 - (cropx // 2) | |
starty = y // 2 - (cropy // 2) | |
img[starty:starty+cropy, startx:startx+cropx] = fill | |
return img | |
def sized_center_mask(img, cropx, cropy): | |
y, x = img.shape[:2] | |
startx = x // 2 - (cropx // 2) | |
starty = y // 2 - (cropy // 2) | |
center_region = img[starty:starty+cropy, startx:startx+cropx].copy() | |
img = (img * 0.2).astype('uint8') | |
img[starty:starty+cropy, startx:startx+cropx] = center_region | |
return img | |
def center_crop(img, HW=None, tgt_size=(512, 512)): | |
if HW is None: | |
H, W = img.shape[:2] | |
HW = min(H, W) | |
img = sized_center_crop(img, HW, HW) | |
img = Image.fromarray(img) | |
img = img.resize(tgt_size) | |
return np.array(img) | |
def draw(task, input, grounding_texts, new_image_trigger, state): | |
if type(input) == dict: | |
image = input['image'] | |
mask = input['mask'] | |
else: | |
mask = input | |
if mask.ndim == 3: | |
mask = mask[..., 0] | |
image_scale = 1.0 | |
# resize trigger | |
if task == "Grounded Inpainting": | |
mask_cond = mask.sum() == 0 | |
# size_cond = mask.shape != (512, 512) | |
if mask_cond and 'original_image' not in state: | |
image = Image.fromarray(image) | |
width, height = image.size | |
scale = 600 / min(width, height) | |
image = image.resize((int(width * scale), int(height * scale))) | |
state['original_image'] = np.array(image).copy() | |
image_scale = float(height / width) | |
return [None, new_image_trigger + 1, image_scale, state] | |
else: | |
original_image = state['original_image'] | |
H, W = original_image.shape[:2] | |
image_scale = float(H / W) | |
mask = binarize(mask) | |
if mask.shape != (512, 512): | |
# assert False, "should not receive any non- 512x512 masks." | |
if 'original_image' in state and state['original_image'].shape[:2] == mask.shape: | |
mask = center_crop(mask, state['inpaint_hw']) | |
image = center_crop(state['original_image'], state['inpaint_hw']) | |
else: | |
mask = np.zeros((512, 512), dtype=np.uint8) | |
# mask = center_crop(mask) | |
mask = binarize(mask) | |
if type(mask) != np.ndarray: | |
mask = np.array(mask) | |
if mask.sum() == 0 and task != "Grounded Inpainting": | |
state = {} | |
if task != 'Grounded Inpainting': | |
image = None | |
else: | |
image = Image.fromarray(image) | |
if 'boxes' not in state: | |
state['boxes'] = [] | |
if 'masks' not in state or len(state['masks']) == 0: | |
state['masks'] = [] | |
last_mask = np.zeros_like(mask) | |
else: | |
last_mask = state['masks'][-1] | |
if type(mask) == np.ndarray and mask.size > 1: | |
diff_mask = mask - last_mask | |
else: | |
diff_mask = np.zeros([]) | |
if diff_mask.sum() > 0: | |
x1x2 = np.where(diff_mask.max(0) != 0)[0] | |
y1y2 = np.where(diff_mask.max(1) != 0)[0] | |
y1, y2 = y1y2.min(), y1y2.max() | |
x1, x2 = x1x2.min(), x1x2.max() | |
if (x2 - x1 > 5) and (y2 - y1 > 5): | |
state['masks'].append(mask.copy()) | |
state['boxes'].append((x1, y1, x2, y2)) | |
grounding_texts = [x.strip() for x in grounding_texts.split(';')] | |
grounding_texts = [x for x in grounding_texts if len(x) > 0] | |
if len(grounding_texts) < len(state['boxes']): | |
grounding_texts += [f'Obj. {bid+1}' for bid in range(len(grounding_texts), len(state['boxes']))] | |
box_image = draw_box(state['boxes'], grounding_texts, image) | |
if box_image is not None and state.get('inpaint_hw', None): | |
inpaint_hw = state['inpaint_hw'] | |
box_image_resize = np.array(box_image.resize((inpaint_hw, inpaint_hw))) | |
original_image = state['original_image'].copy() | |
box_image = sized_center_fill(original_image, box_image_resize, inpaint_hw, inpaint_hw) | |
return [box_image, new_image_trigger, image_scale, state] | |
def clear(task, sketch_pad_trigger, batch_size, state, switch_task=False): | |
if task != 'Grounded Inpainting': | |
sketch_pad_trigger = sketch_pad_trigger + 1 | |
blank_samples = batch_size % 2 if batch_size > 1 else 0 | |
out_images = [gr.Image.update(value=None, visible=True) for i in range(batch_size)] \ | |
+ [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \ | |
+ [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)] | |
state = {} | |
return [None, sketch_pad_trigger, None, 1.0] + out_images + [state] | |
css = """ | |
#generate-btn { | |
--tw-border-opacity: 1; | |
border-color: rgb(255 216 180 / var(--tw-border-opacity)); | |
--tw-gradient-from: rgb(255 216 180 / .7); | |
--tw-gradient-to: rgb(255 216 180 / 0); | |
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to); | |
--tw-gradient-to: rgb(255 176 102 / .8); | |
--tw-text-opacity: 1; | |
color: rgb(238 116 0 / var(--tw-text-opacity)); | |
} | |
#img2img_image, #img2img_image > .h-60, #img2img_image > .h-60 > div, #img2img_image > .h-60 > div > img | |
{ | |
height: var(--height) !important; | |
max-height: var(--height) !important; | |
min-height: var(--height) !important; | |
} | |
#mirrors a:hover { | |
cursor:pointer; | |
} | |
#paper-info a { | |
color:#008AD7; | |
} | |
#paper-info a:hover { | |
cursor: pointer; | |
} | |
""" | |
rescale_js = """ | |
function(x) { | |
const root = document.querySelector('gradio-app').shadowRoot || document.querySelector('gradio-app'); | |
let image_scale = parseFloat(root.querySelector('#image_scale input').value) || 1.0; | |
const image_width = root.querySelector('#img2img_image').clientWidth; | |
const target_height = parseInt(image_width * image_scale); | |
document.body.style.setProperty('--height', `${target_height}px`); | |
root.querySelectorAll('button.justify-center.rounded')[0].style.display='none'; | |
root.querySelectorAll('button.justify-center.rounded')[1].style.display='none'; | |
return x; | |
} | |
""" | |
mirror_js = """ | |
function () { | |
const root = document.querySelector('gradio-app').shadowRoot || document.querySelector('gradio-app'); | |
const mirrors_div = root.querySelector('#mirrors'); | |
const current_url = window.location.href; | |
const mirrors = [ | |
'https://dev.hliu.cc/gligen_mirror1/', | |
'https://dev.hliu.cc/gligen_mirror2/', | |
]; | |
let mirror_html = ''; | |
mirror_html += '[<a href="https://gligen.github.io" target="_blank" style="">Project Page</a>]'; | |
mirror_html += '[<a href="https://arxiv.org/abs/2301.07093" target="_blank" style="">Paper</a>]'; | |
mirror_html += '[<a href="https://github.com/gligen/GLIGEN" target="_blank" style="">GitHub Repo</a>]'; | |
mirror_html += ' | '; | |
mirror_html += 'Mirrors: '; | |
mirrors.forEach((e, index) => { | |
let cur_index = index + 1; | |
if (current_url.includes(e)) { | |
mirror_html += `[Mirror ${cur_index}] `; | |
} else { | |
mirror_html += `[<a onclick="window.location.href = '${e}'">Mirror ${cur_index}</a>] `; | |
} | |
}); | |
mirror_html = `<div class="output-markdown gr-prose" style="max-width: 100%;"><h3 style="text-align: center" id="paper-info">${mirror_html}</h3></div>`; | |
mirrors_div.innerHTML = mirror_html; | |
} | |
""" | |
# Set up OpenAI API key | |
openai.api_key = os.environ['OPENAI_API_KEY'] | |
prompt_base = 'Separate the subjects in this sentence by semicolons. For example, the sentence "a tiger and a horse running in a greenland" should output "tiger; horse". If there are numbers, make each subject unique. For example, "2 dogs and 1 duck" would be "dog; dog; duck." Do the same for the following sentence: \n' | |
original_input = "" | |
separated_subjects = "" | |
language_instruction = gr.Textbox( | |
label="Language Instruction by User", | |
value="2 horses running", | |
visible=False | |
) | |
grounding_instruction = gr.Textbox( | |
label="Subjects in image (Separated by semicolon)", | |
value="horse; horse", | |
visible=False | |
) | |
def separate_subjects(input_text): | |
prompt = prompt_base + input_text | |
response = openai.Completion.create( | |
engine="text-davinci-002", | |
prompt=prompt, | |
max_tokens=1024, | |
n=1, | |
stop=None, | |
temperature=0.7, | |
) | |
output_text = response.choices[0].text.strip() | |
return output_text | |
# def update_original_input(): | |
# print("start update_original_input") | |
# global original_input | |
# original_input = language_instruction.value | |
# print("original_input in update:", original_input) | |
# def update_grounding_instruction(): | |
# print("start update_grounding_instruction") | |
# # global original_input # declare you want to use the outer variable | |
# global separated_subjects | |
# update_original_input() | |
# separated_subjects = separate_subjects(language_instruction.value) | |
# # separated_subjects = separate_subjects(original_input) | |
# grounding_instruction.value = separated_subjects | |
# print("original_input:", original_input) | |
# print("separated_subjects", separated_subjects) | |
with Blocks( | |
css=css, | |
analytics_enabled=False, | |
title="GLIGen demo", | |
) as main: | |
gr.Markdown('<h1 style="text-align: center;">MSR: MultiSubject Render</h1>') | |
gr.Markdown('<h3 style="text-align: center;">Using NLP and Grounding Processing Techniques to improve image generation of multiple subjects with base Stable Diffusion Model</h3>') | |
with gr.Row(): | |
with gr.Column(scale=4): | |
sketch_pad_trigger = gr.Number(value=0, visible=False) | |
sketch_pad_resize_trigger = gr.Number(value=0, visible=False) | |
init_white_trigger = gr.Number(value=0, visible=False) | |
image_scale = gr.Number(value=0, elem_id="image_scale", visible=False) | |
new_image_trigger = gr.Number(value=0, visible=False) | |
# UNCOMMENT THIS WHEN YOU WANT TO TOGGLE INPAINTING OPTION | |
task = gr.Radio( | |
choices=["Grounded Generation", 'Grounded Inpainting'], | |
type="value", | |
value="Grounded Generation", | |
label="Task", | |
) | |
# language_instruction = gr.Textbox( | |
# label="Enter your prompt here", | |
# ) | |
# grounding_instruction = gr.Textbox( | |
# label="Grounding instruction (Separated by semicolon)", | |
# ) | |
# grounding_instruction = separate_subjects(language_instruction.value) | |
# print(f"The user entered: {language_instruction}") | |
# print(f"Our function gave: {grounding_instruction}") | |
# EXPERIMENTING: | |
with gr.Column(): | |
seed = gr.Text(label="Enter your prompt here:") | |
gr.Examples(["2 horses running", "A cowboy and ninja fighting", "An apple and an orange on a table"], inputs=[seed]) | |
with gr.Column(): | |
btn = gr.Button("Gen") | |
with gr.Column(): | |
separated_text = gr.Text(label="Subjects Separated by Semicolon") | |
btn.click(separate_subjects, inputs=[seed], outputs=[separated_text]) | |
language_instruction.value = seed | |
grounding_instruction.value = separated_text | |
#################### | |
# language_instruction = gr.Textbox( | |
# label="Enter your prompt here", | |
# ) | |
# original_input = language_instruction.value | |
# start_btn = gr.Button('Start') | |
# start_btn.click(update_grounding_instruction) | |
# print("separated subjects 2:", separated_subjects) | |
# language_instruction = gr.Textbox( | |
# label="just needs to be here", | |
# value=seed, | |
# visible=False | |
# ) | |
# grounding_instruction = gr.Textbox( | |
# label="Subjects in image (Separated by semicolon)", | |
# value=separated_text, | |
# visible=False | |
# ) | |
print("Language instruction:", language_instruction.value) | |
print("Grounding instruction:", grounding_instruction.value) | |
#################### | |
with gr.Row(): | |
sketch_pad = ImageMask(label="Sketch Pad", elem_id="img2img_image") | |
out_imagebox = gr.Image(type="pil", label="Parsed Sketch Pad") | |
with gr.Row(): | |
clear_btn = gr.Button(value='Clear') | |
gen_btn = gr.Button(value='Generate', elem_id="generate-btn") | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Column(): | |
alpha_sample = gr.Slider(minimum=0, maximum=1.0, step=0.1, value=0.3, label="Scheduled Sampling (Ο)", visible=False) | |
guidance_scale = gr.Slider(minimum=0, maximum=50, step=0.5, value=20, label="Guidance Scale (how closely it adheres to your prompt)") | |
batch_size = gr.Slider(minimum=1, maximum=4, step=1, value=4, label="Number of Images") | |
append_grounding = gr.Checkbox(value=True, label="Append grounding instructions to the caption", visible=False) | |
use_actual_mask = gr.Checkbox(value=False, label="Use actual mask for inpainting", visible=False) | |
with gr.Row(): | |
fix_seed = gr.Checkbox(value=False, label="Fixed seed", visible=False) | |
rand_seed = gr.Slider(minimum=0, maximum=1000, step=1, value=0, label="Seed", visible=False) | |
with gr.Row(): | |
use_style_cond = gr.Checkbox(value=False, label="Enable Style Condition", visible=False) | |
style_cond_image = gr.Image(type="pil", label="Style Condition", interactive=True, visible=False) | |
with gr.Column(scale=4): | |
gr.Markdown("### Generated Images") | |
with gr.Row(): | |
out_gen_1 = gr.Image(type="pil", visible=True, show_label=False) | |
out_gen_2 = gr.Image(type="pil", visible=True, show_label=False) | |
with gr.Row(): | |
out_gen_3 = gr.Image(type="pil", visible=True, show_label=False) | |
out_gen_4 = gr.Image(type="pil", visible=True, show_label=False) | |
state = gr.State({}) | |
class Controller: | |
def __init__(self): | |
self.calls = 0 | |
self.tracks = 0 | |
self.resizes = 0 | |
self.scales = 0 | |
def init_white(self, init_white_trigger): | |
self.calls += 1 | |
return np.ones((512, 512), dtype='uint8') * 255, 1.0, init_white_trigger+1 | |
def change_n_samples(self, n_samples): | |
blank_samples = n_samples % 2 if n_samples > 1 else 0 | |
return [gr.Image.update(visible=True) for _ in range(n_samples + blank_samples)] \ | |
+ [gr.Image.update(visible=False) for _ in range(4 - n_samples - blank_samples)] | |
def resize_centercrop(self, state): | |
self.resizes += 1 | |
image = state['original_image'].copy() | |
inpaint_hw = int(0.9 * min(*image.shape[:2])) | |
state['inpaint_hw'] = inpaint_hw | |
image_cc = center_crop(image, inpaint_hw) | |
# print(f'resize triggered {self.resizes}', image.shape, '->', image_cc.shape) | |
return image_cc, state | |
def resize_masked(self, state): | |
self.resizes += 1 | |
image = state['original_image'].copy() | |
inpaint_hw = int(0.9 * min(*image.shape[:2])) | |
state['inpaint_hw'] = inpaint_hw | |
image_mask = sized_center_mask(image, inpaint_hw, inpaint_hw) | |
state['masked_image'] = image_mask.copy() | |
# print(f'mask triggered {self.resizes}') | |
return image_mask, state | |
def switch_task_hide_cond(self, task): | |
cond = False | |
if task == "Grounded Generation": | |
cond = True | |
return gr.Checkbox.update(visible=cond, value=False), gr.Image.update(value=None, visible=False), gr.Slider.update(visible=cond), gr.Checkbox.update(visible=(not cond), value=False) | |
controller = Controller() | |
main.load( | |
lambda x:x+1, | |
inputs=sketch_pad_trigger, | |
outputs=sketch_pad_trigger, | |
queue=False) | |
sketch_pad.edit( | |
draw, | |
inputs=[task, sketch_pad, grounding_instruction, sketch_pad_resize_trigger, state], | |
outputs=[out_imagebox, sketch_pad_resize_trigger, image_scale, state], | |
queue=False, | |
) | |
grounding_instruction.change( | |
draw, | |
inputs=[task, sketch_pad, grounding_instruction, sketch_pad_resize_trigger, state], | |
outputs=[out_imagebox, sketch_pad_resize_trigger, image_scale, state], | |
queue=False, | |
) | |
clear_btn.click( | |
clear, | |
inputs=[task, sketch_pad_trigger, batch_size, state], | |
outputs=[sketch_pad, sketch_pad_trigger, out_imagebox, image_scale, out_gen_1, out_gen_2, out_gen_3, out_gen_4, state], | |
queue=False) | |
task.change( | |
partial(clear, switch_task=True), | |
inputs=[task, sketch_pad_trigger, batch_size, state], | |
outputs=[sketch_pad, sketch_pad_trigger, out_imagebox, image_scale, out_gen_1, out_gen_2, out_gen_3, out_gen_4, state], | |
queue=False) | |
sketch_pad_trigger.change( | |
controller.init_white, | |
inputs=[init_white_trigger], | |
outputs=[sketch_pad, image_scale, init_white_trigger], | |
queue=False) | |
sketch_pad_resize_trigger.change( | |
controller.resize_masked, | |
inputs=[state], | |
outputs=[sketch_pad, state], | |
queue=False) | |
batch_size.change( | |
controller.change_n_samples, | |
inputs=[batch_size], | |
outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4], | |
queue=False) | |
gen_btn.click( | |
generate, | |
inputs=[ | |
task, language_instruction, grounding_instruction, sketch_pad, | |
alpha_sample, guidance_scale, batch_size, | |
fix_seed, rand_seed, | |
use_actual_mask, | |
append_grounding, style_cond_image, | |
state, | |
], | |
outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, state], | |
queue=True | |
) | |
# start_btn.click( | |
# update_grounding_instruction, | |
# # inputs=[ | |
# # original_input, | |
# # ], | |
# # outputs=[separated_subjects], | |
# # queue=True | |
# ) | |
sketch_pad_resize_trigger.change( | |
None, | |
None, | |
sketch_pad_resize_trigger, | |
_js=rescale_js, | |
queue=False) | |
init_white_trigger.change( | |
None, | |
None, | |
init_white_trigger, | |
_js=rescale_js, | |
queue=False) | |
use_style_cond.change( | |
lambda cond: gr.Image.update(visible=cond), | |
use_style_cond, | |
style_cond_image, | |
queue=False) | |
task.change( | |
controller.switch_task_hide_cond, | |
inputs=task, | |
outputs=[use_style_cond, style_cond_image, alpha_sample, use_actual_mask], | |
queue=False) | |
main.queue(concurrency_count=1, api_open=False) | |
main.launch(share=False, show_api=False) |