fast-image-server

Paused

File size: 4,098 Bytes

b13990e
65fd06d
 
 
 
 
b7fbd38
504fb8a
 
65fd06d
b13990e
65fd06d
 
 
7f11b82
65fd06d
b13990e
 
 
65fd06d
 
b13990e
cad2673
b13990e
 
 
 
 
65fd06d
 
 
 
 
 
 
 
b13990e
 
88cc598
 
0810225
 
88cc598
65fd06d
 
504fb8a
7db3ca3
 
 
 
 
 
 
504fb8a
 
 
 
 
 
 
 
65fd06d
108abb9
895e905
 
 
611d1ae
9aa1c3d
895e905
 
108abb9
 
 
 
 
 
 
 
 
 
 
 
504fb8a
65fd06d
108abb9
 
 
 
ec93dcb
108abb9
 
 
 
 
 
65fd06d
108abb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b13990e
 
108abb9
b13990e
108abb9
 
b13990e
 
108abb9
b13990e
65fd06d
 
 
 
 
 
 
7db3ca3
 
7f11b82
65fd06d
 
 
 
 
 
 
8790f79
504fb8a


#!/usr/bin/env python

import os
import gradio as gr
import numpy as np
from PIL
import base64
import io
import torch
from diffusers import LCMScheduler, AutoPipelineForText2Image

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = int(os.getenv('MAX_IMAGE_SIZE', '1024'))
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')

MODEL_ID = "segmind/SSD-1B"
ADAPTER_ID = "latent-consistency/lcm-lora-ssd-1b"

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    pipe = AutoPipelineForText2Image.from_pretrained(MODEL_ID, torch_dtype=torch.float16, variant="fp16")
    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
    pipe.to("cuda")

    # load and fuse
    pipe.load_lora_weights(ADAPTER_ID)
    pipe.fuse_lora()
else:
    pipe = None

def generate(prompt: str,
             negative_prompt: str = '',
             seed: int = 0,
             width: int = 1024,
             height: int = 1024,
             guidance_scale: float = 0.0,
             num_inference_steps: int = 4,
             secret_token: str = '') -> PIL.Image.Image:
    if secret_token != SECRET_TOKEN:
        raise gr.Error(
            f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
        
    generator = torch.Generator().manual_seed(seed)

    image = pipe(prompt=prompt,
                negative_prompt=negative_prompt,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                generator=generator,
                output_type='pil').images[0]
    
    # Convert PIL image to a byte stream
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    
    # Encode to base64
    img_str = base64.b64encode(buffered.getvalue()).decode()
    return img_str

with gr.Blocks() as demo:
    gr.HTML("""
    <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
        <div style="text-align: center; color: black;">
            <p style="color: black;">This space is a REST API to programmatically generate images using LCM LoRA SSD-1B.</p>
            <p style="color: black;">It is not meant to be directly used through a user interface, but using code and an access key.</p>
        </div>
    </div>""")
    secret_token = gr.Text(
        label='Secret Token',
        max_lines=1,
        placeholder='Enter your secret token',
    )
    prompt = gr.Text(
        label='Prompt',
        show_label=False,
        max_lines=1,
        placeholder='Enter your prompt',
        container=False,
    )
    result = gr.Image(label='Result', show_label=False, type="base64")

    negative_prompt = gr.Text(
        label='Negative prompt',
        max_lines=1,
        placeholder='Enter a negative prompt',
        visible=True,
    )
    seed = gr.Slider(label='Seed',
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0)

    width = gr.Slider(
        label='Width',
        minimum=256,
        maximum=MAX_IMAGE_SIZE,
        step=32,
        value=1024,
    )
    height = gr.Slider(
        label='Height',
        minimum=256,
        maximum=MAX_IMAGE_SIZE,
        step=32,
        value=1024,
    )
    guidance_scale = gr.Slider(
        label='Guidance scale',
        minimum=0,
        maximum=2,
        step=0.1,
        value=0.0)
    num_inference_steps = gr.Slider(
        label='Number of inference steps',
        minimum=1,
        maximum=8,
        step=1,
        value=4)

    inputs = [
        prompt,
        negative_prompt,
        seed,
        width,
        height,
        guidance_scale,
        num_inference_steps,
        secret_token,
    ]
    prompt.submit(
        fn=generate,
        inputs=inputs,
        outputs=result,
        api_name='run',
    )

demo.queue(max_size=32).launch()