Spaces:

befozg
/

PortraitTransfer

Running

App Files Files Community

befozg commited on Sep 25, 2023

Commit

0891b79

1 Parent(s): 0eff2bf

fixed live demo app, converted network for onnx convertion, fixed code

Browse files

Files changed (14) hide show

.gitignore +18 -0
app.py +4 -49
config/test.yaml +3 -2
converter.py +35 -0
live_demo.py +71 -0
live_mp.py +112 -0
output.mp4 +0 -0
requirements.txt +3 -1
tools/__init__.py +1 -1
tools/engine.py +12 -0
tools/inference.py +48 -9
tools/model.py +14 -5
tools/stylematte.py +1 -21
tools/util.py +49 -6

.gitignore CHANGED Viewed

@@ -173,3 +173,21 @@ __pycache__/*
 flagged/
 # assets/
 .DS_store

 flagged/
 # assets/
 .DS_store
+config/*
+trainer/__pycache__/
+trainer/__pycache__/*
+__pycache__/*
+checkpoints/*.pth
+*/*.pth
+*/checkpoints/best_pure.pth
+checkpoints/best_pure.pth
+*.ipynb
+.ipynb_checkpoints/*
+flagged/
+assets/*
+*.html
+checkpoints/*.onnx
+*.avi
+*.onnx

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from tools import Inference, Matting, log
 from omegaconf import OmegaConf
 import os
 import sys
@@ -9,56 +9,11 @@ from PIL import Image
 args = OmegaConf.load(os.path.join(f"./config/test.yaml"))
-global_comp = None
-global_mask = None
 log("Model loading")
 phnet = Inference(**args)
 stylematte = Matting(**args)
 log("Model loaded")
-def harmonize(comp, mask):
-    log("Inference started")
-    if comp is None or mask is None:
-        log("Empty source")
-        return np.zeros((16, 16, 3))
-    comp = comp.convert('RGB')
-    mask = mask.convert('1')
-    in_shape = comp.size[::-1]
-    comp = tf.resize(comp, [args.image_size, args.image_size])
-    mask = tf.resize(mask, [args.image_size, args.image_size])
-    compt = tf.to_tensor(comp)
-    maskt = tf.to_tensor(mask)
-    res = phnet.harmonize(compt, maskt)
-    res = tf.resize(res, in_shape)
-    log("Inference finished")
-    return np.uint8((res*255)[0].permute(1, 2, 0).numpy())
-def extract_matte(img, back):
-    mask, fg = stylematte.extract(img)
-    fg_pil = Image.fromarray(np.uint8(fg))
-    composite = fg + (1 - mask[:, :, None]) * \
-        np.array(back.resize(mask.shape[::-1]))
-    composite_pil = Image.fromarray(np.uint8(composite))
-    global_comp = composite_pil
-    global_mask = mask
-    return [composite_pil, mask, fg_pil]
-def css(height=3, scale=2):
-    return f".output_image {{height: {height}rem !important; width: {scale}rem !important;}}"
 with gr.Blocks() as demo:
     gr.Markdown(
         """
@@ -97,11 +52,11 @@ with gr.Blocks() as demo:
         harmonized_ui = gr.Image(
             type="pil", label='Harmonized composite', css=css(3, 3))
-    btn_compose.click(extract_matte, inputs=[input_ui, back_ui], outputs=[
                       composite_ui, matte_ui, fg_ui])
-    btn_harmonize.click(harmonize, inputs=[
                         composite_ui, matte_ui], outputs=[harmonized_ui])
 log("Interface created")
-demo.launch(share=True)

 import gradio as gr
+from tools import Inference, Matting, log, extract_matte, harmonize, css
 from omegaconf import OmegaConf
 import os
 import sys
 args = OmegaConf.load(os.path.join(f"./config/test.yaml"))
 log("Model loading")
 phnet = Inference(**args)
 stylematte = Matting(**args)
 log("Model loaded")
 with gr.Blocks() as demo:
     gr.Markdown(
         """
         harmonized_ui = gr.Image(
             type="pil", label='Harmonized composite', css=css(3, 3))
+    btn_compose.click(lambda x, y: extract_matte(x, y, stylematte), inputs=[input_ui, back_ui], outputs=[
                       composite_ui, matte_ui, fg_ui])
+    btn_harmonize.click(lambda x, y: harmonize(x, y, phnet), inputs=[
                         composite_ui, matte_ui], outputs=[harmonized_ui])
 log("Interface created")
+demo.launch(share=False)

config/test.yaml CHANGED Viewed

@@ -5,7 +5,8 @@ init_value: 0.8
 skips: True
 device: 'cpu'
 checkpoint:
-  harmonizer: "checkpoints/best_pure.pth"
   matting: "checkpoints/stylematte.pth"
 image_size: 1024

 skips: True
 device: 'cpu'
 checkpoint:
+  matting_onnx: "checkpoints/stylematte_720.onnx"
+  harmonizer: "checkpoints/ffhqh1024.pth"
   matting: "checkpoints/stylematte.pth"
+onnx: False
 image_size: 1024

converter.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torchvision
+import io
+import numpy as np
+import torch.onnx
+import onnx
+from tools import Inference, Matting, log, extract_matte, harmonize, css, execute_onnx_model
+from omegaconf import OmegaConf
+import os
+import sys
+import torch
+import numpy as np
+import torchvision.transforms.functional as tf
+from PIL import Image
+import cv2 as cv
+from onnxruntime import InferenceSession
+args = OmegaConf.load(os.path.join(f"./config/test.yaml"))
+log("Model loading")
+phnet = Inference(**args)
+stylematte = Matting(**args)
+log("Model loaded")
+model = stylematte.model
+x = torch.randn((1, 3, 720, 1280))
+mask = torch.ones((1, 1, 512, 512))
+path = 'checkpoints/stylematte-test.onnx'
+# Export
+torch.onnx.export(model, x, path, opset_version=16)
+# Validation
+onnx_model = onnx.load(path)
+onnx.checker.check_model(onnx_model)
+# execute_onnx_model(x, onnx_model)

live_demo.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+from tools import Inference, Matting, log, extract_matte, harmonize, css, live_matting_step
+from omegaconf import OmegaConf
+import os
+import sys
+import numpy as np
+import torchvision.transforms.functional as tf
+from PIL import Image
+import cv2 as cv
+import time
+import asyncio
+args = OmegaConf.load(os.path.join(f"./config/test.yaml"))
+log("Model loading")
+phnet = Inference(**args)
+stylematte = Matting(**args)
+log("Model loaded")
+async def show(queue):
+    while True:
+        log("SHOW FRAME")
+        frame = queue.get()
+        cv.imshow('Video', frame)
+        await asyncio.sleep(0.01)
+async def main(queue):
+    video = cv.VideoCapture(0)
+    fps = 10
+    counter = 0
+    frame_count = 0
+    if not video.isOpened():
+        raise Exception('Video is not opened!')
+    begin = time.time()
+    for i in range(300):
+        counter += 1
+        frame_count += 1
+        ret, frame = video.read()  # Capture frame-by-frame
+        inp = np.array(frame)
+        back = np.zeros_like(frame)
+        queue.put(inp)
+        # res = asyncio.ensure_future(
+        # live_matting_step(inp, back, stylematte))
+        # res = await live_matting_step(inp, back, stylematte)
+        log(f"{i} await")
+        # Display the resulting frame
+        # blurred_frame = cv.blur(frame, (10, 10))
+        end = time.time()
+        log(f"frames: {frame_count}, time: {end - begin}, fps: {frame_count/(end - begin) }")
+        if cv.waitKey(1) & 0xFF == ord('q'):
+            break
+    end = time.time()
+    log(f"OVERALL TIME CONSUMED: {end - begin}, frames: {frame_count}, fps: {frame_count/(end - begin) }")
+    # release the capture
+    video.release()
+    cv.destroyAllWindows()
+if __name__ == "__main__":
+    queue = asyncio.Queue()
+    loop = asyncio.get_event_loop()
+    # asyncio.ensure_future(show(frame))  # Display the resulting frame
+    loop.run_until_complete(main(queue))
+    loop.run_until_complete(show(queue))
+    loop.run_forever()

live_mp.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from multiprocessing import Process, Queue
+import gradio as gr
+from tools import Inference, Matting, log, extract_matte, harmonize, css, live_matting_step
+from omegaconf import OmegaConf
+import os
+import sys
+import numpy as np
+import torchvision.transforms.functional as tf
+from PIL import Image
+import cv2 as cv
+import time
+import asyncio
+def show(queue, stack):
+    print(f"PROCESS {3}")
+    # while not queue.empty():
+    if stack.empty():
+        frame = queue.get()
+    else:
+        frame = stack.get(block=False)
+    cv.imshow('Video', np.uint8(frame))
+    log("PID: 3, SHOW FRAME")
+    print(frame.shape)
+    time.sleep(0.1)
+def extract(queue, stack, model):
+    '''
+    img: np.array,
+    back: np.array,
+    model: Matting instance
+    '''
+    print(f"PROCESS {2}")
+    img = queue.get()
+    back = np.zeros_like(img)
+    mask, fg = model.extract(img)
+    composite = fg + (1 - mask[:, :, None]) * \
+        back  # .resize(mask.shape[::-1])
+    stack.put(np.uint8(composite))
+    # time.sleep(0.1)
+    print("PID: 2, LIVE STEP")
+    # for i in range(10):
+    #     print(f"In live {i}")
+    # cv.imshow('Video', np.uint8(composite))
+    # return composite
+def main(queue):
+    log(f"PROCESS {1}")
+    video = cv.VideoCapture(0)
+    fps = 10
+    counter = 0
+    frame_count = 0
+    if not video.isOpened():
+        raise Exception('Video is not opened!')
+    begin = time.time()
+    # stack = Queue()
+    for i in range(10):
+        counter += 1
+        frame_count += 1
+        ret, frame = video.read()  # Capture frame-by-frame
+        inp = np.array(frame)
+        back = np.zeros_like(frame)
+        # res = asyncio.ensure_future(
+        # live_matting_step(inp, back, stylematte))
+        # res = live_matting_step(inp, back, stylematte)
+        queue.put(inp)
+        mp.sleep(0.1)
+        # Display the resulting frame
+        # blurred_frame = cv.blur(frame, (10, 10))
+        counter = 0
+        end = time.time()
+        log(f"PID: 1, frames: {frame_count}, time: {end - begin}, fps: {frame_count/(end - begin) }")
+    # else:
+        # show(queue)  # Display the resulting frame
+        if cv.waitKey(1) & 0xFF == ord('q'):
+            break
+    end = time.time()
+    log(f"OVERALL TIME CONSUMED: {end - begin}, frames: {frame_count}, fps: {frame_count/(end - begin) }")
+    # release the capture
+    video.release()
+    cv.destroyAllWindows()
+if __name__ == "__main__":
+    queue = Queue()  # Создаем канал
+    stack = Queue()  # Создаем канал
+    # stack = Queue()  # Создаем канал
+    args = OmegaConf.load(os.path.join(f"./config/test.yaml"))
+    log("Model loading")
+    phnet = Inference(**args)
+    stylematte = Matting(**args)
+    log("Model loaded")
+    p1 = Process(target=main, args=(queue,))  # Вводим параметры
+    p2 = Process(target=extract, args=(
+        queue, stack, stylematte))  # Вводим параметры
+    p3 = Process(target=show, args=(queue, stack))  # Вводим параметры
+    # p2 = Process(target=test_2, args=("Пончик", queue,))  # Вводим параметры
+    p1.start()
+    p2.start()
+    p3.start()
+    p3.join()
+    p2.join()
+    p1.join()

output.mp4 ADDED Viewed

Binary file (258 Bytes). View file

requirements.txt CHANGED Viewed

@@ -35,4 +35,6 @@ torchaudio==0.11.0
 torchvision==0.12.0
 tornado==6.2
 tqdm==4.64.1
-transformers==4.28.1

 torchvision==0.12.0
 tornado==6.2
 tqdm==4.64.1
+transformers==4.28.1
+onnx==1.14.1
+onnxruntime==1.16.0

tools/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .inference import Inference
 from .inference import Matting
-from .util import log

 from .inference import Inference
 from .inference import Matting
+from .util import *

tools/engine.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import onnx
+from onnxruntime import InferenceSession
+import numpy as np
+import torch
+def execute_onnx_model(x, onnx_model) -> None:
+    sess = InferenceSession(onnx_model.SerializeToString(), providers=[
+                            'AzureExecutionProvider', 'CPUExecutionProvider'])
+    out = sess.run(None, {'input.1': x.numpy().astype(np.float32)})[0]
+    return out

tools/inference.py CHANGED Viewed

@@ -4,6 +4,11 @@ import torchvision.transforms.functional as tf
 from .util import inference_img, log
 from .stylematte import StyleMatte
 import numpy as np
 class Inference:
@@ -15,7 +20,6 @@ class Inference:
                            grid_count=self.grid_counts,
                            init_weights=self.init_weights,
                            init_value=self.init_value)
-        log(f"checkpoint: {self.checkpoint.harmonizer}")
         state = torch.load(self.checkpoint.harmonizer,
                            map_location=self.device)
@@ -29,12 +33,13 @@ class Inference:
             mask = mask.unsqueeze(0)
         composite = tf.resize(composite, [self.image_size, self.image_size])
         mask = tf.resize(mask, [self.image_size, self.image_size])
         log(composite.shape, mask.shape)
         with torch.no_grad():
-            harmonized = self.model(composite, mask)['harmonized']
         result = harmonized * mask + composite * (1-mask)
-        print(result.shape)
         return result
@@ -42,15 +47,49 @@ class Matting:
     def __init__(self, **kwargs):
         self.rank = 0
         self.__dict__.update(kwargs)
-        self.model = StyleMatte().to(self.device)
-        log(f"checkpoint: {self.checkpoint.matting}")
-        state = torch.load(self.checkpoint.matting, map_location=self.device)
-        self.model.load_state_dict(state, strict=True)
-        self.model.eval()
     def extract(self, inp):
-        mask = inference_img(self.model, inp, self.device)
         inp_np = np.array(inp)
         fg = mask[:, :, None]*inp_np
         return [mask, fg]

 from .util import inference_img, log
 from .stylematte import StyleMatte
 import numpy as np
+import onnx
+from .engine import execute_onnx_model
+import cv2
+from torchvision import transforms
+import time
 class Inference:
                            grid_count=self.grid_counts,
                            init_weights=self.init_weights,
                            init_value=self.init_value)
         state = torch.load(self.checkpoint.harmonizer,
                            map_location=self.device)
             mask = mask.unsqueeze(0)
         composite = tf.resize(composite, [self.image_size, self.image_size])
         mask = tf.resize(mask, [self.image_size, self.image_size])
         log(composite.shape, mask.shape)
         with torch.no_grad():
+            harmonized = self.model(composite, mask)  # ['harmonized']
         result = harmonized * mask + composite * (1-mask)
         return result
     def __init__(self, **kwargs):
         self.rank = 0
         self.__dict__.update(kwargs)
+        if self.onnx:
+            self.model = onnx.load(self.checkpoint.matting_onnx)
+        else:
+            self.model = StyleMatte().to(self.device)
+            state = torch.load(self.checkpoint.matting,
+                               map_location=self.device)
+            self.model.load_state_dict(state, strict=True)
+            self.model.eval()
     def extract(self, inp):
+        mask = inference_img(self.model, inp, self.device, self.onnx)
         inp_np = np.array(inp)
         fg = mask[:, :, None]*inp_np
         return [mask, fg]
+def inference_img(model, img, device='cpu', onnx=True):
+    beg = time.time()
+    h, w, _ = img.shape
+    # print(img.shape)
+    if h % 8 != 0 or w % 8 != 0:
+        img = cv2.copyMakeBorder(img, 8-h % 8, 0, 8-w %
+                                 8, 0, cv2.BORDER_REFLECT)
+    # print(img.shape)
+    tensor_img = torch.from_numpy(img).permute(2, 0, 1).to(device)
+    input_t = tensor_img/255.0
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225])
+    input_t = normalize(input_t)
+    input_t = input_t.unsqueeze(0).float()
+    end_p = time.time()
+    if onnx:
+        out = execute_onnx_model(input_t, model)
+    else:
+        with torch.no_grad():
+            out = model(input_t).cpu().numpy()
+    end = time.time()
+    log(f"Inference time: {end-beg}, processing time: {end_p-beg}")
+    # print("out",out.shape)
+    result = out[0][:, -h:, -w:]
+    # print(result.shape)
+    return result[0]

tools/model.py CHANGED Viewed

@@ -133,6 +133,16 @@ class Inference_Data(Dataset):
         return self.data_len
 class SEBlock(nn.Module):
     def __init__(self, channel, reducation=8):
         super(SEBlock, self).__init__()
@@ -152,7 +162,8 @@ class SEBlock(nn.Module):
         y = self.fc(y1).view(b, c, 1, 1)
         r = x*y
         if aux_inp is not None:
-            aux_weitghts = nn.AdaptiveAvgPool2d(aux_inp.shape[-1]//8)(aux_inp)
             aux_weitghts = nn.Sigmoid()(aux_weitghts.mean(1, keepdim=True))
             tmp = x*aux_weitghts
             tmp_img = (tmp - tmp.min()) / (tmp.max() - tmp.min())
@@ -283,11 +294,9 @@ class PHNet(nn.Module):
             x = self.skip[i](x)
             x = up_layer(x)
-        relighted = F.sigmoid(x)
-        return {
-            "harmonized": relighted,  # target prediction
-        }
     def set_requires_grad(self, modules=["encoder", "sh_head", "resquare", "decoder"], value=False):
         for module in modules:

         return self.data_len
+class MyAdaptiveMaxPool2d(nn.Module):
+    def __init__(self, sz=None):
+        super().__init__()
+    def forward(self, x):
+        inp_size = x.size()
+        return nn.functional.max_pool2d(input=x,
+                                        kernel_size=(inp_size[2], inp_size[3]))
 class SEBlock(nn.Module):
     def __init__(self, channel, reducation=8):
         super(SEBlock, self).__init__()
         y = self.fc(y1).view(b, c, 1, 1)
         r = x*y
         if aux_inp is not None:
+            aux_weitghts = MyAdaptiveMaxPool2d(
+                aux_inp.shape[-1]//8)(aux_inp)
             aux_weitghts = nn.Sigmoid()(aux_weitghts.mean(1, keepdim=True))
             tmp = x*aux_weitghts
             tmp_img = (tmp - tmp.min()) / (tmp.max() - tmp.min())
             x = self.skip[i](x)
             x = up_layer(x)
+        harmonized = F.sigmoid(x)
+        return harmonized
     def set_requires_grad(self, modules=["encoder", "sh_head", "resquare", "decoder"], value=False):
         for module in modules:

tools/stylematte.py CHANGED Viewed

@@ -284,10 +284,6 @@ class CenterBlock(nn.Sequential):
 class SegForm(nn.Module):
     def __init__(self):
         super(SegForm, self).__init__()
-#         configuration = SegformerConfig.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-#         configuration.num_labels = 1 ## set output as 1
-#         self.model = SegformerForSemanticSegmentation(config=configuration)
         self.model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0", num_labels=1, ignore_mismatched_sizes=True
                                                                       )
@@ -303,22 +299,13 @@ class SegForm(nn.Module):
 class StyleMatte(nn.Module):
     def __init__(self):
         super(StyleMatte, self).__init__()
-#         configuration = SegformerConfig.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-#         configuration.num_labels = 1 ## set output as 1
         self.fpn = FPN_fuse(feature_channels=[256, 256, 256, 256], fpn_out=256)
         self.pixel_decoder = Mask2FormerForUniversalSegmentation.from_pretrained(
             "facebook/mask2former-swin-tiny-coco-instance").base_model.pixel_level_module
         self.fgf = FastGuidedFilter()
         self.conv = nn.Conv2d(256, 1, kernel_size=3, padding=1)
-        # self.mean =  torch.Tensor([0.43216, 0.394666, 0.37645]).float().view(-1, 1, 1)
-        # self.register_buffer('image_net_mean', self.mean)
-        # self.std = torch.Tensor([0.22803, 0.22145, 0.216989]).float().view(-1, 1, 1)
-        # self.register_buffer('image_net_std', self.std)
     def forward(self, image, normalize=False):
-        # if normalize:
-        #     image.sub_(self.get_buffer("image_net_mean")).div_(self.get_buffer("image_net_std"))
         decoder_out = self.pixel_decoder(image)
         decoder_states = list(decoder_out.decoder_hidden_states)
         decoder_states.append(decoder_out.decoder_last_hidden_state)
@@ -331,18 +318,11 @@ class StyleMatte(nn.Module):
                                              )
         out = self.conv(out_pure)
         out = self.fgf(image_lr, out, image.mean(
-            1, keepdim=True))  # .clip(0,1)
-        # out = nn.Sigmoid()(out)
-        # out =  nn.functional.interpolate(out,
-        #                     scale_factor=4,
-        #                     mode='bicubic',
-        #                     align_corners=True
-        #                 )
         return torch.sigmoid(out)
     def get_training_params(self):
-        # +list(self.fgf.parameters())
         return list(self.fpn.parameters())+list(self.conv.parameters())

 class SegForm(nn.Module):
     def __init__(self):
         super(SegForm, self).__init__()
         self.model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b0", num_labels=1, ignore_mismatched_sizes=True
                                                                       )
 class StyleMatte(nn.Module):
     def __init__(self):
         super(StyleMatte, self).__init__()
         self.fpn = FPN_fuse(feature_channels=[256, 256, 256, 256], fpn_out=256)
         self.pixel_decoder = Mask2FormerForUniversalSegmentation.from_pretrained(
             "facebook/mask2former-swin-tiny-coco-instance").base_model.pixel_level_module
         self.fgf = FastGuidedFilter()
         self.conv = nn.Conv2d(256, 1, kernel_size=3, padding=1)
     def forward(self, image, normalize=False):
         decoder_out = self.pixel_decoder(image)
         decoder_states = list(decoder_out.decoder_hidden_states)
         decoder_states.append(decoder_out.decoder_last_hidden_state)
                                              )
         out = self.conv(out_pure)
         out = self.fgf(image_lr, out, image.mean(
+            1, keepdim=True))
         return torch.sigmoid(out)
     def get_training_params(self):
         return list(self.fpn.parameters())+list(self.conv.parameters())

tools/util.py CHANGED Viewed

@@ -6,13 +6,10 @@ import torch.nn as nn
 from torchvision.utils import make_grid
 import cv2
 from torchvision import transforms, models
-def log(msg, lvl='info'):
-    if lvl == 'info':
-        print(f"***********{msg}****************")
-    if lvl == 'error':
-        print(f"!!! Exception: {msg} !!!")
 def lab_shift(x, invert=False):
@@ -321,6 +318,7 @@ def linear_rgb_to_rgb(image: torch.Tensor) -> torch.Tensor:
     return rgb
 def inference_img(model, img, device='cpu'):
     h, w, _ = img.shape
     # print(img.shape)
@@ -343,3 +341,48 @@ def inference_img(model, img, device='cpu'):
     # print(result.shape)
     return result[0]

 from torchvision.utils import make_grid
 import cv2
 from torchvision import transforms, models
+from PIL import Image
+import torchvision.transforms.functional as tf
+# --------------------------------------------Metric tools-------------------------------------------- #
 def lab_shift(x, invert=False):
     return rgb
+# --------------------------------------------Inference tools-------------------------------------------- #
 def inference_img(model, img, device='cpu'):
     h, w, _ = img.shape
     # print(img.shape)
     # print(result.shape)
     return result[0]
+def log(msg, lvl='info'):
+    if lvl == 'info':
+        print(f"***********{msg}****************")
+    if lvl == 'error':
+        print(f"!!! Exception: {msg} !!!")
+def harmonize(comp, mask, model):
+    log("Inference started")
+    if comp is None or mask is None:
+        log("Empty source")
+        return np.zeros((16, 16, 3))
+    comp = comp.convert('RGB')
+    mask = mask.convert('1')
+    in_shape = comp.size[::-1]
+    comp = tf.resize(comp, [model.image_size, model.image_size])
+    mask = tf.resize(mask, [model.image_size, model.image_size])
+    compt = tf.to_tensor(comp)
+    maskt = tf.to_tensor(mask)
+    res = model.harmonize(compt, maskt)
+    res = tf.resize(res, in_shape)
+    log("Inference finished")
+    return np.uint8((res*255)[0].permute(1, 2, 0).numpy())
+def extract_matte(img, back, model):
+    mask, fg = model.extract(img)
+    fg_pil = Image.fromarray(np.uint8(fg))
+    composite = fg + (1 - mask[:, :, None]) * \
+        np.array(back.resize(mask.shape[::-1]))
+    composite_pil = Image.fromarray(np.uint8(composite))
+    return [composite_pil, mask, fg_pil]
+def css(height=3, scale=2):
+    return f".output_image {{height: {height}rem !important; width: {scale}rem !important;}}"