|
import gradio as gr |
|
from gradio_imageslider import ImageSlider |
|
import torch |
|
from diffusers import DiffusionPipeline, AutoencoderKL |
|
from PIL import Image |
|
from torchvision import transforms |
|
import tempfile |
|
import os |
|
import time |
|
import uuid |
|
|
|
LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1" |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
dtype = torch.float16 |
|
|
|
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype) |
|
pipe = DiffusionPipeline.from_pretrained( |
|
"stabilityai/stable-diffusion-xl-base-1.0", |
|
custom_pipeline="pipeline_demofusion_sdxl.py", |
|
custom_revision="main", |
|
torch_dtype=dtype, |
|
variant="fp16", |
|
use_safetensors=True, |
|
vae=vae, |
|
) |
|
pipe = pipe.to(device) |
|
|
|
|
|
def load_and_process_image(pil_image): |
|
transform = transforms.Compose( |
|
[ |
|
transforms.Resize((1024, 1024)), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), |
|
] |
|
) |
|
image = transform(pil_image) |
|
image = image.unsqueeze(0).half() |
|
return image |
|
|
|
|
|
def pad_image(image): |
|
w, h = image.size |
|
if w == h: |
|
return image |
|
elif w > h: |
|
new_image = Image.new(image.mode, (w, w), (0, 0, 0)) |
|
pad_w = 0 |
|
pad_h = (w - h) // 2 |
|
new_image.paste(image, (0, pad_h)) |
|
return new_image |
|
else: |
|
new_image = Image.new(image.mode, (h, h), (0, 0, 0)) |
|
pad_w = (h - w) // 2 |
|
pad_h = 0 |
|
new_image.paste(image, (pad_w, 0)) |
|
return new_image |
|
|
|
|
|
def predict( |
|
input_image, |
|
prompt, |
|
negative_prompt, |
|
seed, |
|
scale=2, |
|
progress=gr.Progress(track_tqdm=True), |
|
): |
|
if input_image is None: |
|
raise gr.Error("Please upload an image.") |
|
padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB") |
|
image_lr = load_and_process_image(padded_image).to(device) |
|
generator = torch.manual_seed(seed) |
|
last_time = time.time() |
|
images = pipe( |
|
prompt, |
|
negative_prompt=negative_prompt, |
|
image_lr=image_lr, |
|
width=1024 * scale, |
|
height=1024 * scale, |
|
view_batch_size=16, |
|
stride=64, |
|
generator=generator, |
|
num_inference_steps=25, |
|
guidance_scale=7.5, |
|
cosine_scale_1=3, |
|
cosine_scale_2=1, |
|
cosine_scale_3=1, |
|
sigma=0.8, |
|
multi_decoder=True, |
|
show_image=False, |
|
lowvram=LOW_MEMORY, |
|
) |
|
print(f"Time taken: {time.time() - last_time}") |
|
images_path = tempfile.mkdtemp() |
|
paths = [] |
|
uuid_name = uuid.uuid4() |
|
for i, img in enumerate(images): |
|
img.save(images_path + f"/img_{uuid_name}_{img.size[0]}.jpg") |
|
paths.append(images_path + f"/img_{uuid_name}_{img.size[0]}.jpg") |
|
return (images[0], images[-1]), paths |
|
|
|
|
|
css = """ |
|
#intro{ |
|
max-width: 32rem; |
|
text-align: center; |
|
margin: 0 auto; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown( |
|
""" |
|
# Zoom and Enhance with DemoFusion SDXL |
|
|
|
[DemoFusion](https://github.com/PRIS-CV/DemoFusion) enables higher-resolution image generation. |
|
You can upload an initial image and prompt to generate an enhanced version. |
|
[Duplicate Space](https://huggingface.co/spaces/radames/Enhance-This-DemoFusion-SDXL?duplicate=true) to avoid the queue. |
|
<small> |
|
*Note*: The author advises against the term "super resolution" because it's more like image-to-image generation than enhancement, but it's still a lot of fun! |
|
</small> |
|
""", |
|
elem_id="intro", |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
image_input = gr.Image(type="pil", label="Input Image") |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
info="The prompt is very important to get the desired results. Please try to describe the image as best as you can.", |
|
) |
|
negative_prompt = gr.Textbox( |
|
label="Negative Prompt", |
|
value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
) |
|
scale = gr.Slider( |
|
minimum=1, |
|
maximum=5, |
|
value=2, |
|
step=1, |
|
label="x Scale", |
|
interactive=False, |
|
) |
|
seed = gr.Slider( |
|
minimum=0, |
|
maximum=2**64 - 1, |
|
value=1415926535897932, |
|
step=1, |
|
label="Seed", |
|
randomize=True, |
|
) |
|
btn = gr.Button() |
|
with gr.Column(scale=2): |
|
image_slider = ImageSlider() |
|
files = gr.Files() |
|
inputs = [image_input, prompt, negative_prompt, seed, scale] |
|
|
|
outputs = [image_slider, files] |
|
btn.click(predict, inputs=inputs, outputs=outputs, concurrency_limit=1) |
|
gr.Examples( |
|
fn=predict, |
|
examples=[ |
|
[ |
|
"./examples/lara.jpeg", |
|
"photography of lara croft 8k high definition award winning", |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
1528069323235669750, |
|
2, |
|
], |
|
[ |
|
"./examples/cybetruck.jpeg", |
|
"photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future", |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
9257959681232992980, |
|
3, |
|
], |
|
[ |
|
"./examples/jesus.png", |
|
"a photorealistic painting of Jesus Christ, 4k high definition", |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
13317204146129588000, |
|
2, |
|
], |
|
[ |
|
"./examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg", |
|
"A crowded stadium with enthusiastic fans watching a daytime sporting event, the stands filled with colorful attire and the sun casting a warm glow" |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
8398712905087378000, |
|
3 |
|
], |
|
inputs=inputs, |
|
outputs=outputs, |
|
cache_examples=True, |
|
) |
|
|
|
|
|
demo.queue(api_open=False) |
|
demo.launch(show_api=False) |
|
|