Spaces:

Podtekatel
/

Arcane_Style_Transfer

Runtime error

App Files Files Community

Podtekatel commited on Nov 7, 2022

Commit

046b3c9

1 Parent(s): 4425d8c

Initial commit for arcane

Browse files

Files changed (15) hide show

.gitattributes +0 -1
README.md +4 -4
app.py +66 -0
demo/IMG1.jpg +0 -0
demo/IMG2.png +0 -0
demo/IMG3.jpg +0 -0
hf_download.py +18 -0
inference/__init__.py +0 -0
inference/box_utils.py +31 -0
inference/center_crop.py +24 -0
inference/face_detector.py +121 -0
inference/model_pipeline.py +110 -0
inference/onnx_model.py +14 -0
packages.txt +1 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -2,7 +2,6 @@
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text

 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
 title: Arcane Style Transfer
-emoji: 💩
 colorFrom: blue
-colorTo: green
 sdk: gradio
-sdk_version: 3.9
 app_file: app.py
-pinned: false
 license: bsd-3-clause
 ---

 ---
 title: Arcane Style Transfer
+emoji: 👩🏻‍🔧💎😈
 colorFrom: blue
+colorTo: pink
 sdk: gradio
+sdk_version: 3.8.2
 app_file: app.py
+pinned: true
 license: bsd-3-clause
 ---

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import logging
+import os
+import gradio as gr
+import numpy as np
+from PIL import Image
+from huggingface_hub import hf_hub_url, cached_download
+from inference.face_detector import StatRetinaFaceDetector
+from inference.model_pipeline import VSNetModelPipeline
+from inference.onnx_model import ONNXModel
+logging.basicConfig(
+    format='%(asctime)s %(levelname)-8s %(message)s',
+    level=logging.INFO,
+    datefmt='%Y-%m-%d %H:%M:%S')
+MODEL_IMG_SIZE = 256
+def load_model():
+    REPO_ID = "Podtekatel/ARCNEGAN"
+    FILENAME = "arcane_exp_203_ep_281.onnx"
+    global model
+    global pipeline
+    model_path = cached_download(
+        hf_hub_url(REPO_ID, FILENAME), use_auth_token=os.getenv('HF_TOKEN')
+    )
+    model = ONNXModel(model_path)
+    pipeline = VSNetModelPipeline(model, StatRetinaFaceDetector(MODEL_IMG_SIZE), background_resize=1024, no_detected_resize=1024)
+    return model
+load_model()
+def inference(img):
+    img = np.array(img)
+    out_img = pipeline(img)
+    out_img = Image.fromarray(out_img)
+    return out_img
+title = "JJStyleTransfer"
+description = "Gradio Demo for Arcane Season 1 style transfer. To use it, simply upload your image, or click one of the examples to load them."
+article = "This is one of my successful experiments on style transfer. I've built my own pipeline, generator model and private dataset to train this model<br>" \
+          "" \
+          "" \
+          "" \
+          "Model pipeline which used in project is improved CartoonGAN.<br>" \
+          "This model was trained on RTX 2080 Ti 1.5 days with batch size 7.<br>" \
+          "Model weights 64 MB in ONNX fp32 format, infers 25 ms on GPU and 150 ms on CPU at 256x256 resolution.<br>" \
+          "If you want to use this app or integrate this model into yours, please contact me at email '[email protected]'."
+imgs_folder = 'demo'
+examples = [[os.path.join(imgs_folder, img_filename)] for img_filename in sorted(os.listdir(imgs_folder))]
+demo = gr.Interface(
+    fn=inference,
+    inputs=[gr.inputs.Image(type="pil")],
+    outputs=gr.outputs.Image(type="pil"),
+    title=title,
+    description=description,
+    article=article,
+    examples=examples)
+demo.launch()

demo/IMG1.jpg ADDED Viewed

demo/IMG2.png ADDED Viewed

demo/IMG3.jpg ADDED Viewed

hf_download.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import numpy as np
+from huggingface_hub import hf_hub_url, cached_download
+import joblib
+REPO_ID = "MalchuL/JJBAGAN"
+FILENAME = "198_jjba_8_k_2_099_ep.onnx"
+model = cached_download(
+    hf_hub_url(REPO_ID, FILENAME)
+)
+print(model)
+import onnxruntime
+ort_session = onnxruntime.InferenceSession(str(model))
+input_name = ort_session.get_inputs()[0].name
+ort_inputs = {input_name: np.random.randn(1, 3, 256, 256).astype(dtype=np.float32)}
+ort_outs = ort_session.run(None, ort_inputs)
+print(ort_outs)

inference/__init__.py ADDED Viewed

File without changes

inference/box_utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import numpy as np
+def convert_to_square(bboxes):
+    """Convert bounding boxes to a square form.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 4].
+    Returns:
+        a float numpy array of shape [4],
+            squared bounding boxes.
+    """
+    square_bboxes = np.zeros_like(bboxes)
+    x1, y1, x2, y2 = bboxes
+    h = y2 - y1 + 1.0
+    w = x2 - x1 + 1.0
+    max_side = np.maximum(h, w)
+    square_bboxes[0] = x1 + w * 0.5 - max_side * 0.5
+    square_bboxes[1] = y1 + h * 0.5 - max_side * 0.5
+    square_bboxes[2] = square_bboxes[0] + max_side - 1.0
+    square_bboxes[3] = square_bboxes[1] + max_side - 1.0
+    return square_bboxes
+def scale_box(box, scale):
+    x1, y1, x2, y2 = box
+    center_x, center_y = (x1 + x2) / 2, (y1 + y2) / 2
+    w, h = x2 - x1, y2 - y1
+    new_w, new_h = w * scale, h * scale
+    y1, y2, x1, x2 = center_y - new_h / 2, center_y + new_h / 2, center_x - new_w / 2, center_x + new_w / 2,
+    return np.array((x1, y1, x2, y2))

inference/center_crop.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import numpy as np
+# From albumentations
+def center_crop(img: np.ndarray, crop_height: int, crop_width: int):
+    height, width = img.shape[:2]
+    if height < crop_height or width < crop_width:
+        raise ValueError(
+            "Requested crop size ({crop_height}, {crop_width}) is "
+            "larger than the image size ({height}, {width})".format(
+                crop_height=crop_height, crop_width=crop_width, height=height, width=width
+            )
+        )
+    x1, y1, x2, y2 = get_center_crop_coords(height, width, crop_height, crop_width)
+    img = img[y1:y2, x1:x2]
+    return img
+def get_center_crop_coords(height: int, width: int, crop_height: int, crop_width: int):
+    y1 = (height - crop_height) // 2
+    y2 = y1 + crop_height
+    x1 = (width - crop_width) // 2
+    x2 = x1 + crop_width
+    return x1, y1, x2, y2

inference/face_detector.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+from abc import ABC, abstractmethod
+from typing import List
+import cv2
+import numpy as np
+from retinaface import RetinaFace
+from retinaface.model import retinaface_model
+from .box_utils import convert_to_square
+class FaceDetector(ABC):
+    def __init__(self, target_size):
+        self.target_size = target_size
+    @abstractmethod
+    def detect_crops(self, img, *args, **kwargs) -> List[np.ndarray]:
+        """
+        Img is a numpy ndarray in range [0..255], uint8 dtype, RGB type
+        Returns ndarray with [x1, y1, x2, y2] in row
+        """
+        pass
+    @abstractmethod
+    def postprocess_crops(self, crops, *args, **kwargs) -> List[np.ndarray]:
+        return crops
+    def sort_faces(self, crops):
+        sorted_faces = sorted(crops, key=lambda x: -(x[2] - x[0]) * (x[3] - x[1]))
+        sorted_faces = np.stack(sorted_faces, axis=0)
+        return sorted_faces
+    def fix_range_crops(self, img, crops):
+        H, W, _ = img.shape
+        final_crops = []
+        for crop in crops:
+            x1, y1, x2, y2 = crop
+            x1 = max(min(round(x1), W), 0)
+            y1 = max(min(round(y1), H), 0)
+            x2 = max(min(round(x2), W), 0)
+            y2 = max(min(round(y2), H), 0)
+            new_crop = [x1, y1, x2, y2]
+            final_crops.append(new_crop)
+        final_crops = np.array(final_crops, dtype=np.int)
+        return final_crops
+    def crop_faces(self, img, crops) -> List[np.ndarray]:
+        cropped_faces = []
+        for crop in crops:
+            x1, y1, x2, y2 = crop
+            face_crop = img[y1:y2, x1:x2, :]
+            cropped_faces.append(face_crop)
+        return cropped_faces
+    def unify_and_merge(self, cropped_images):
+        return cropped_images
+    def __call__(self, img):
+        return self.detect_faces(img)
+    def detect_faces(self, img):
+        crops = self.detect_crops(img)
+        if crops is None or len(crops) == 0:
+            return [], []
+        crops = self.sort_faces(crops)
+        updated_crops = self.postprocess_crops(crops)
+        updated_crops = self.fix_range_crops(img, updated_crops)
+        cropped_faces = self.crop_faces(img, updated_crops)
+        unified_faces = self.unify_and_merge(cropped_faces)
+        return unified_faces, updated_crops
+class StatRetinaFaceDetector(FaceDetector):
+    def __init__(self, target_size=None):
+        super().__init__(target_size)
+        self.model = retinaface_model.build_model()
+        #self.relative_offsets = [0.3258, 0.5225, 0.3258, 0.1290]
+        self.relative_offsets = [0.3619, 0.5830, 0.3619, 0.1909]
+    def postprocess_crops(self, crops, *args, **kwargs) -> np.ndarray:
+        final_crops = []
+        x1_offset, y1_offset, x2_offset, y2_offset = self.relative_offsets
+        for crop in crops:
+            x1, y1, x2, y2 = crop
+            w, h = x2 - x1, y2 - y1
+            x1 -= w * x1_offset
+            y1 -= h * y1_offset
+            x2 += w * x2_offset
+            y2 += h * y2_offset
+            crop = np.array([x1, y1, x2, y2], dtype=crop.dtype)
+            crop = convert_to_square(crop)
+            final_crops.append(crop)
+        final_crops = np.stack(final_crops, axis=0)
+        return final_crops
+    def detect_crops(self, img, *args, **kwargs):
+        faces = RetinaFace.detect_faces(img, model=self.model)
+        crops = []
+        if isinstance(faces, tuple):
+            faces = {}
+        for name, face in faces.items():
+            x1, y1, x2, y2 = face['facial_area']
+            crop = np.array([x1, y1, x2, y2])
+            crops.append(crop)
+        if len(crops) > 0:
+            crops = np.stack(crops, axis=0)
+        return crops
+    def unify_and_merge(self, cropped_images):
+        if self.target_size is None:
+            return cropped_images
+        else:
+            resized_images = []
+            for cropped_image in cropped_images:
+                resized_image = cv2.resize(cropped_image, (self.target_size, self.target_size),
+                                           interpolation=cv2.INTER_LINEAR)
+                resized_images.append(resized_image)
+            resized_images = np.stack(resized_images, axis=0)
+            return resized_images

inference/model_pipeline.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import logging
+import time
+import cv2
+import numpy as np
+from .center_crop import center_crop
+from .face_detector import FaceDetector
+class VSNetModelPipeline:
+    def __init__(self, model, face_detector: FaceDetector, background_resize=720, no_detected_resize=256):
+        self.background_resize = background_resize
+        self.no_detected_resize = no_detected_resize
+        self.model = model
+        self.face_detector = face_detector
+        self.mask = self.create_circular_mask(face_detector.target_size, face_detector.target_size)
+    @staticmethod
+    def create_circular_mask(h, w, power=None, clipping_coef=0.85):
+        center = (int(w / 2), int(h / 2))
+        Y, X = np.ogrid[:h, :w]
+        dist_from_center = np.sqrt((X - center[0]) ** 2 + (Y - center[1]) ** 2)
+        print(dist_from_center.max(), dist_from_center.min())
+        clipping_radius = min((h - center[0]), (w - center[1])) * clipping_coef
+        max_size = max((h - center[0]), (w - center[1]))
+        dist_from_center[dist_from_center < clipping_radius] = clipping_radius
+        dist_from_center[dist_from_center > max_size] = max_size
+        max_distance, min_distance = np.max(dist_from_center), np.min(dist_from_center)
+        dist_from_center = 1 - (dist_from_center - min_distance) / (max_distance - min_distance)
+        if power is not None:
+            dist_from_center = np.power(dist_from_center, power)
+        dist_from_center = np.stack([dist_from_center] * 3, axis=2)
+        # mask = dist_from_center <= radius
+        return dist_from_center
+    @staticmethod
+    def resize_size(image, size=720, always_apply=True):
+        h, w, c = np.shape(image)
+        if min(h, w) > size or always_apply:
+            if h < w:
+                h, w = int(size * h / w), size
+            else:
+                h, w = size, int(size * w / h)
+        image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
+        return image
+    def normalize(self, img):
+        img = img.astype(np.float32) / 255 * 2 - 1
+        return img
+    def denormalize(self, img):
+        return (img + 1) / 2
+    def divide_crop(self, img, must_divided=32):
+        h, w, _ = img.shape
+        h = h // must_divided * must_divided
+        w = w // must_divided * must_divided
+        img = center_crop(img, h, w)
+        return img
+    def merge_crops(self, faces_imgs, crops, full_image):
+        for face, crop in zip(faces_imgs, crops):
+            x1, y1, x2, y2 = crop
+            W, H = x2 - x1, y2 - y1
+            result_face = cv2.resize(face, (W, H), interpolation=cv2.INTER_LINEAR)
+            face_mask = cv2.resize(self.mask, (W, H), interpolation=cv2.INTER_LINEAR)
+            input_face = full_image[y1: y2, x1: x2]
+            full_image[y1: y2, x1: x2] = (result_face * face_mask + input_face * (1 - face_mask)).astype(np.uint8)
+        return full_image
+    def __call__(self, img):
+        return self.process_image(img)
+    def process_image(self, img):
+        img = self.resize_size(img, size=self.background_resize)
+        img = self.divide_crop(img)
+        face_crops, coords = self.face_detector(img)
+        if len(face_crops) > 0:
+            start_time = time.time()
+            faces = self.normalize(face_crops)
+            faces = faces.transpose(0, 3, 1, 2)
+            out_faces = self.model(faces)
+            out_faces = self.denormalize(out_faces)
+            out_faces = out_faces.transpose(0, 2, 3, 1)
+            out_faces = np.clip(out_faces * 255, 0, 255).astype(np.uint8)
+            end_time = time.time()
+            logging.info(f'Face FPS {1 / (end_time - start_time)}')
+        else:
+            out_faces = []
+            img = self.resize_size(img, size=self.no_detected_resize)
+            img = self.divide_crop(img)
+        start_time = time.time()
+        full_image = self.normalize(img)
+        full_image = np.expand_dims(full_image, 0).transpose(0, 3, 1, 2)
+        full_image = self.model(full_image)
+        full_image = self.denormalize(full_image)
+        full_image = full_image.transpose(0, 2, 3, 1)
+        full_image = np.clip(full_image * 255, 0, 255).astype(np.uint8)
+        end_time = time.time()
+        logging.info(f'Background FPS {1 / (end_time - start_time)}')
+        result_image = self.merge_crops(out_faces, coords, full_image[0])
+        return result_image

inference/onnx_model.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import numpy as np
+import onnxruntime
+class ONNXModel:
+    def __init__(self, onnx_mode_path):
+        self.path = onnx_mode_path
+        self.ort_session = onnxruntime.InferenceSession(str(self.path))
+        self.input_name = self.ort_session.get_inputs()[0].name
+    def __call__(self, img):
+        ort_inputs = {self.input_name: img.astype(dtype=np.float32)}
+        ort_outs = self.ort_session.run(None, ort_inputs)[0]
+        return ort_outs

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+huggingface_hub
+onnxruntime
+numpy
+gradio
+retina-face