File size: 3,053 Bytes
2bed270
 
 
 
 
 
6a8f037
 
d829b71
6a8f037
 
2bed270
 
 
6a8f037
473b89a
b54826a
6a8f037
2bed270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acc3215
 
 
23e7420
acc3215
 
2bed270
 
acc3215
d829b71
32a2c12
2bed270
32a2c12
2bed270
32a2c12
acc3215
23e7420
 
d829b71
2bed270
 
 
a4efcf4
2bed270
6e89f05
 
acc3215
 
3280a05
4f693a4
2bed270
 
 
 
 
 
 
 
e8cc959
2bed270
d829b71
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from typing import Dict, List, Any
from transformers import pipeline
import torch
import base64
from io import BytesIO
from PIL import Image
# from diffusers import StableDiffusionXLImg2ImgPipeline
# from diffusers.utils import load_image
import numpy as np
from diffusers import AutoPipelineForImage2Image
from diffusers.utils import load_image


class EndpointHandler():
    def __init__(self, path=""):                        
        self.pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
        self.pipe.to("cuda")        
       

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
       data args:
            inputs (:obj: `str`)
            date (:obj: `str`)
       Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        # get inputs
        inputs = data.pop("inputs", data)
        encoded_image = data.pop("image", None)
        
        # hyperparamters
        num_inference_steps = data.pop("num_inference_steps", 25)
        guidance_scale = data.pop("guidance_scale", 7.5)
        negative_prompt = data.pop("negative_prompt", None)

        strength = data.pop("strength", 0.7)
        denoising_start = data.pop("denoising_start_step", 0)
        denoising_end = data.pop("denoising_end_step", 1)
        num_images_per_prompt = data.pop("num_images_per_prompt", 1)
        aesthetic_score = data.pop("aesthetic_score", 0.6)
        
        # process image
        if encoded_image is not None:
            image = self.decode_base64_image(encoded_image)
            print("Image is getting loaded")
        else:
            print("Image is None")
            image = None

            
        print(f"Prompt: {inputs}, strength: {strength}, inf steps: {num_inference_steps}, denoise start: {denoising_start}, denoise_end: {denoising_end}")
        print(f"Imgs per prompt: {num_images_per_prompt}, aesthetic_score: {aesthetic_score}, guidance_scale: {guidance_scale}, negative_prompt: {negative_prompt}")

        # run inference pipeline
        out = self.pipe(inputs, 
                        image=image,             
                        strength=strength,
                        num_inference_steps=num_inference_steps,
                        denoising_start=denoising_start,
                        denoising_end=denoising_end,
                        num_images_per_prompt=num_images_per_prompt,
                        aesthetic_score=aesthetic_score,
                        guidance_scale=guidance_scale,                        
                        negative_prompt=negative_prompt
        )
            
        # return first generate PIL image
        return out.images[0]

    # helper to decode input image
    def decode_base64_image(self, image_string):
        base64_image = base64.b64decode(image_string)
        buffer = BytesIO(base64_image)
        image = Image.open(buffer)
        pil_image = Image.fromarray(np.array(image))
        return pil_image