jayparmr commited on Jul 14, 2023

Commit

19b3da3

1 Parent(s): 564eb4b

Upload 118 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +31 -0
config.yml +157 -0
deployment.ipynb +995 -0
handler.py +11 -0
inference.py +341 -0
inference2.py +169 -0
internals/__init__.py +0 -0
internals/data/__init__.py +0 -0
internals/data/dataAccessor.py +104 -0
internals/data/result.py +19 -0
internals/data/task.py +125 -0
internals/pipelines/commons.py +119 -0
internals/pipelines/controlnets.py +221 -0
internals/pipelines/img_classifier.py +24 -0
internals/pipelines/img_to_text.py +31 -0
internals/pipelines/inpainter.py +41 -0
internals/pipelines/object_remove.py +82 -0
internals/pipelines/prompt_modifier.py +54 -0
internals/pipelines/remove_background.py +16 -0
internals/pipelines/safety_checker.py +163 -0
internals/pipelines/twoStepPipeline.py +252 -0
internals/pipelines/upscaler.py +91 -0
internals/util/__init__.py +0 -0
internals/util/args.py +13 -0
internals/util/avatar.py +59 -0
internals/util/cache.py +31 -0
internals/util/commons.py +203 -0
internals/util/config.py +66 -0
internals/util/failure_hander.py +40 -0
internals/util/image.py +18 -0
internals/util/lora_style.py +154 -0
internals/util/slack.py +58 -0
models/ade20k/.DS_Store +0 -0
models/ade20k/__init__.py +1 -0
models/ade20k/base.py +627 -0
models/ade20k/color150.mat +0 -0
models/ade20k/mobilenet.py +154 -0
models/ade20k/object150_info.csv +151 -0
models/ade20k/resnet.py +181 -0
models/ade20k/segm_lib/.DS_Store +0 -0
models/ade20k/segm_lib/nn/.DS_Store +0 -0
models/ade20k/segm_lib/nn/__init__.py +2 -0
models/ade20k/segm_lib/nn/modules/__init__.py +12 -0
models/ade20k/segm_lib/nn/modules/batchnorm.py +329 -0
models/ade20k/segm_lib/nn/modules/comm.py +131 -0
models/ade20k/segm_lib/nn/modules/replicate.py +94 -0
models/ade20k/segm_lib/nn/modules/tests/test_numeric_batchnorm.py +56 -0
models/ade20k/segm_lib/nn/modules/tests/test_sync_batchnorm.py +111 -0
models/ade20k/segm_lib/nn/modules/unittest.py +29 -0
models/ade20k/segm_lib/nn/parallel/__init__.py +1 -0

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+# creco-inference
+Unified inference code for SageMaker and Hugging Face endpoints
+## Deployment
+- Inference code (this) should be placed in the model folder respectively,
+### SageMaker
+```
+model/
+  code/
+    (repo)  <-- The repo inference code as direct child (no sub-folder)
+  vae
+  unet
+  ...
+```
+- Refer `deployment.ipynb` for creating endpoint.
+### Hugging Face
+```
+model/
+  (repo)  <-- The repo inference code as direct child (no sub-folder)
+  vae
+  unet
+  ...
+```
+- Refer [doc](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint) to create endpoint.

config.yml ADDED Viewed

	@@ -0,0 +1,157 @@

+run_title: b18_ffc075_batch8x15
+training_model:
+  kind: default
+  visualize_each_iters: 1000
+  concat_mask: true
+  store_discr_outputs_for_vis: true
+losses:
+  l1:
+    weight_missing: 0
+    weight_known: 10
+  perceptual:
+    weight: 0
+  adversarial:
+    kind: r1
+    weight: 10
+    gp_coef: 0.001
+    mask_as_fake_target: true
+    allow_scale_mask: true
+  feature_matching:
+    weight: 100
+  resnet_pl:
+    weight: 30
+    weights_path: ${env:TORCH_HOME}
+optimizers:
+  generator:
+    kind: adam
+    lr: 0.001
+  discriminator:
+    kind: adam
+    lr: 0.0001
+visualizer:
+  key_order:
+  - image
+  - predicted_image
+  - discr_output_fake
+  - discr_output_real
+  - inpainted
+  rescale_keys:
+  - discr_output_fake
+  - discr_output_real
+  kind: directory
+  outdir: /group-volume/User-Driven-Content-Generation/r.suvorov/inpainting/experiments/r.suvorov_2021-04-30_14-41-12_train_simple_pix2pix2_gap_sdpl_novgg_large_b18_ffc075_batch8x15/samples
+location:
+  data_root_dir: /group-volume/User-Driven-Content-Generation/datasets/inpainting_data_root_large
+  out_root_dir: /group-volume/User-Driven-Content-Generation/${env:USER}/inpainting/experiments
+  tb_dir: /group-volume/User-Driven-Content-Generation/${env:USER}/inpainting/tb_logs
+data:
+  batch_size: 15
+  val_batch_size: 2
+  num_workers: 3
+  train:
+    indir: ${location.data_root_dir}/train
+    out_size: 256
+    mask_gen_kwargs:
+      irregular_proba: 1
+      irregular_kwargs:
+        max_angle: 4
+        max_len: 200
+        max_width: 100
+        max_times: 5
+        min_times: 1
+      box_proba: 1
+      box_kwargs:
+        margin: 10
+        bbox_min_size: 30
+        bbox_max_size: 150
+        max_times: 3
+        min_times: 1
+      segm_proba: 0
+      segm_kwargs:
+        confidence_threshold: 0.5
+        max_object_area: 0.5
+        min_mask_area: 0.07
+        downsample_levels: 6
+        num_variants_per_mask: 1
+        rigidness_mode: 1
+        max_foreground_coverage: 0.3
+        max_foreground_intersection: 0.7
+        max_mask_intersection: 0.1
+        max_hidden_area: 0.1
+        max_scale_change: 0.25
+        horizontal_flip: true
+        max_vertical_shift: 0.2
+        position_shuffle: true
+    transform_variant: distortions
+    dataloader_kwargs:
+      batch_size: ${data.batch_size}
+      shuffle: true
+      num_workers: ${data.num_workers}
+  val:
+    indir: ${location.data_root_dir}/val
+    img_suffix: .png
+    dataloader_kwargs:
+      batch_size: ${data.val_batch_size}
+      shuffle: false
+      num_workers: ${data.num_workers}
+  visual_test:
+    indir: ${location.data_root_dir}/korean_test
+    img_suffix: _input.png
+    pad_out_to_modulo: 32
+    dataloader_kwargs:
+      batch_size: 1
+      shuffle: false
+      num_workers: ${data.num_workers}
+generator:
+  kind: ffc_resnet
+  input_nc: 4
+  output_nc: 3
+  ngf: 64
+  n_downsampling: 3
+  n_blocks: 18
+  add_out_act: sigmoid
+  init_conv_kwargs:
+    ratio_gin: 0
+    ratio_gout: 0
+    enable_lfu: false
+  downsample_conv_kwargs:
+    ratio_gin: ${generator.init_conv_kwargs.ratio_gout}
+    ratio_gout: ${generator.downsample_conv_kwargs.ratio_gin}
+    enable_lfu: false
+  resnet_conv_kwargs:
+    ratio_gin: 0.75
+    ratio_gout: ${generator.resnet_conv_kwargs.ratio_gin}
+    enable_lfu: false
+discriminator:
+  kind: pix2pixhd_nlayer
+  input_nc: 3
+  ndf: 64
+  n_layers: 4
+evaluator:
+  kind: default
+  inpainted_key: inpainted
+  integral_kind: ssim_fid100_f1
+trainer:
+  kwargs:
+    gpus: -1
+    accelerator: ddp
+    max_epochs: 200
+    gradient_clip_val: 1
+    log_gpu_memory: None
+    limit_train_batches: 25000
+    val_check_interval: ${trainer.kwargs.limit_train_batches}
+    log_every_n_steps: 1000
+    precision: 32
+    terminate_on_nan: false
+    check_val_every_n_epoch: 1
+    num_sanity_val_steps: 8
+    limit_val_batches: 1000
+    replace_sampler_ddp: false
+  checkpoint_kwargs:
+    verbose: true
+    save_top_k: 5
+    save_last: true
+    period: 1
+    monitor: val_ssim_fid100_f1_total_mean
+    mode: max

deployment.ipynb ADDED Viewed

	@@ -0,0 +1,995 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5af7e53b-80ff-4058-888d-fe41804f64ba",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com\n",
+      "Requirement already satisfied: pip in /home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages (23.1.2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install --upgrade pip\n",
+    "!pip install \"sagemaker==2.116.0\" \"huggingface_hub==0.10.1\" --upgrade --quiet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "93ee3d96-400f-46b4-8eb3-0f3f3c853a7e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from distutils.dir_util import copy_tree\n",
+    "from pathlib import Path\n",
+    "from huggingface_hub import snapshot_download\n",
+    "import random\n",
+    "import os\n",
+    "import tarfile\n",
+    "import time\n",
+    "import sagemaker\n",
+    "from datetime import datetime\n",
+    "from sagemaker.s3 import S3Uploader\n",
+    "import boto3\n",
+    "from sagemaker.huggingface.model import HuggingFaceModel\n",
+    "from threading import Thread\n",
+    "import subprocess\n",
+    "import shutil"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2db37b03-b517-46bc-8602-4999a64399c0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ------------------------------------------------\n",
+    "# Configuration\n",
+    "# ------------------------------------------------\n",
+    "STAGE = \"prod\"\n",
+    "model_configs = [\n",
+    "    # {\n",
+    "    #     \"inference_2\": False, \n",
+    "    #     \"path\": \"icbinp\",\n",
+    "    #     \"endpoint_name\": \"gamma-10000-2023-05-16-14-55\"\n",
+    "    #     #\"endpoint_name\": f\"{STAGE}-10000-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    # },\n",
+    "    # {\n",
+    "    #     \"inference_2\": False, \n",
+    "    #     \"path\": \"icb_with_epi\",\n",
+    "    #     \"endpoint_name\": \"gamma-10000-2023-05-16-14-55\"\n",
+    "    #     # \"endpoint_name\": f\"{STAGE}-10000-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    # },\n",
+    "    {\n",
+    "        \"inference_2\": False, \n",
+    "        \"path\": \"model_v9\",\n",
+    "        # \"endpoint_name\": \"gamma-10000-2023-05-16-14-55\"\n",
+    "        \"endpoint_name\": f\"{STAGE}-10000-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    },\n",
+    "    {\n",
+    "        \"inference_2\": False, \n",
+    "        \"path\": \"model_v8\",\n",
+    "        #\"endpoint_name\": \"gamma-10001-2023-05-08-06-14\"\n",
+    "        \"endpoint_name\": f\"{STAGE}-10001-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    },\n",
+    "    # {\n",
+    "    #     \"inference_2\": False, \n",
+    "    #     \"path\": \"model_v5_anime\",\n",
+    "    #     \"endpoint_name\": \"gamma-10001-2023-05-08-06-14\"\n",
+    "    #     #\"endpoint_name\": f\"{STAGE}-10001-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    # },\n",
+    "    # {\n",
+    "    #     \"inference_2\": False, \n",
+    "    #     \"path\": \"model_v5.3_comic\",\n",
+    "    #     #\"endpoint_name\": \"gamma-10002-2023-05-08-07-22\"\n",
+    "    #     \"endpoint_name\": f\"{STAGE}-10002-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    # },\n",
+    "    {\n",
+    "        \"inference_2\": False, \n",
+    "        \"path\": \"model_v10\",\n",
+    "        # \"endpoint_name\": \"gamma-10002-2023-05-08-07-22\"\n",
+    "        \"endpoint_name\": f\"{STAGE}-10002-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    },\n",
+    "    {\n",
+    "        \"inference_2\": True, \n",
+    "        \"path\": \"model_v5.2_other\",\n",
+    "        # \"endpoint_name\": \"gamma-other-2023-05-04-09-33\"\n",
+    "        \"endpoint_name\": f\"{STAGE}-other-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    }\n",
+    "    # {\n",
+    "    #     \"inference_2\": False, \n",
+    "    #     \"path\": \"model_v6_bheem\",\n",
+    "    #     \"endpoint_name\": f\"{STAGE}-10003-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    # },\n",
+    "    # {\n",
+    "    #     \"inference_2\": False, \n",
+    "    #     \"path\": \"model_v12\",\n",
+    "    #     \"endpoint_name\": \"gamma-10003-2023-05-04-05-20\"\n",
+    "    #     # \"endpoint_name\": f\"{STAGE}-10003-\" + datetime.now().strftime(\"%Y-%m-%d-%H-%M\")\n",
+    "    # }\n",
+    "]\n",
+    "\n",
+    "VpcConfig = {\n",
+    "  \"Subnets\": [\n",
+    "      \"subnet-0df3f71df4c7b29e5\",\n",
+    "      \"subnet-0d753b7fc74b5ee68\"\n",
+    "  ],\n",
+    "  \"SecurityGroupIds\": [\n",
+    "      \"sg-033a7948e79a501cd\"\n",
+    "  ]\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d7322ac4-aeeb-4a72-a662-5f3fa74e6454",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def compress(tar_dir=None,output_file=\"model.tar.gz\"):\n",
+    "    parent_dir=os.getcwd()\n",
+    "    os.chdir(parent_dir + \"/\" + tar_dir)\n",
+    "    with tarfile.open(os.path.join(parent_dir, output_file), \"w:gz\") as tar:\n",
+    "        for item in os.listdir('.'):\n",
+    "          print(\"- \" + item)\n",
+    "          tar.add(item, arcname=item)\n",
+    "    os.chdir(parent_dir)\n",
+    "\n",
+    "    \n",
+    "def create_model_tar(config):\n",
+    "    print(\"Copying inference 'code': \" + config.get(\"path\"))\n",
+    "    \n",
+    "    model_tar = Path(config.get(\"path\"))\n",
+    "    if os.path.exists(model_tar.joinpath(\"code\")):\n",
+    "        shutil.rmtree(model_tar.joinpath(\"code\"))\n",
+    "    out_tar = config.get(\"path\") + \".tar.gz\"\n",
+    "    model_tar.mkdir(exist_ok=True)\n",
+    "    copy_tree(\"code/\", str(model_tar.joinpath(\"code\")))\n",
+    "    copy_tree(\"laur_style/\", str(model_tar.joinpath(\"laur_style\")))\n",
+    "    \n",
+    "    if config.get(\"inference_2\"):\n",
+    "        os.remove(model_tar.joinpath(\"code\").joinpath(\"inference.py\"))\n",
+    "        os.rename(model_tar.joinpath(\"code\").joinpath(\"inference2.py\"), model_tar.joinpath(\"code\").joinpath(\"inference.py\"))\n",
+    "        \n",
+    "    print(\"Compressing: \" + config.get(\"path\"))\n",
+    "\n",
+    "    if os.path.exists(out_tar):\n",
+    "        os.remove(out_tar)\n",
+    "\n",
+    "    compress(str(model_tar), out_tar)\n",
+    "    \n",
+    "def upload_to_s3(config):\n",
+    "    out_tar = config.get(\"path\") + \".tar.gz\"\n",
+    "    print(\"Uploading model to S3: \" + out_tar)\n",
+    "    s3_model_uri=S3Uploader.upload(local_path=out_tar, desired_s3_uri=f\"s3://comic-assets/stable-diffusion-v1-4/v2/\")\n",
+    "    return s3_model_uri\n",
+    "    \n",
+    "    \n",
+    "def deploy_and_create_endpoint(config, s3_model_uri):\n",
+    "    sess = sagemaker.Session()\n",
+    "    # sagemaker session bucket -> used for uploading data, models and logs\n",
+    "    # sagemaker will automatically create this bucket if it not exists\n",
+    "    sagemaker_session_bucket=None\n",
+    "    if sagemaker_session_bucket is None and sess is not None:\n",
+    "        # set to default bucket if a bucket name is not given\n",
+    "        sagemaker_session_bucket = sess.default_bucket()\n",
+    "    try:\n",
+    "        role = sagemaker.get_execution_role()\n",
+    "    except ValueError:\n",
+    "        iam = boto3.client('iam')\n",
+    "        role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']\n",
+    "\n",
+    "    sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)\n",
+    "    \n",
+    "    huggingface_model = HuggingFaceModel(\n",
+    "       model_data=s3_model_uri,      # path to your model and script\n",
+    "       role=role,                    # iam role with permissions to create an Endpoint\n",
+    "       transformers_version=\"4.17\",  # transformers version used\n",
+    "       pytorch_version=\"1.10\",       # pytorch version used\n",
+    "       py_version='py38',# python version used\n",
+    "       vpc_config=VpcConfig,\n",
+    "    )\n",
+    "\n",
+    "    print(\"Creating endpoint: \" + config.get(\"endpoint_name\"))\n",
+    "\n",
+    "    predictor = huggingface_model.deploy(\n",
+    "        initial_instance_count=1,\n",
+    "        instance_type=\"ml.g4dn.xlarge\",\n",
+    "        endpoint_name=config.get(\"endpoint_name\")\n",
+    "    )\n",
+    "\n",
+    "    \n",
+    "def start_process(config):\n",
+    "    try:\n",
+    "        create_model_tar(config)\n",
+    "        s3_model_uri = upload_to_s3(config)\n",
+    "        #s3_model_uri = \"s3://comic-assets/stable-diffusion-v1-4/v2//model_v5.2_other.tar.gz\"\n",
+    "        deploy_and_create_endpoint(config, s3_model_uri)\n",
+    "    except Exception as e:\n",
+    "        print(\"Failed to deploy: \" + config.get(\"path\") + \"\\n\" + str(e))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "cdc04669-90a5-4b43-8499-ad1d2dd63a4c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Copying inference 'code': model_v9\n",
+      "Compressing: model_v9\n",
+      "- scheduler\n",
+      "- vae\n",
+      "- .ipynb_checkpoints\n",
+      "- feature_extractor\n",
+      "- tokenizer\n",
+      "- text_encoder\n",
+      "- model_index.json\n",
+      "- laur_style\n",
+      "- code\n",
+      "- unet\n",
+      "- args.json\n",
+      "Uploading model to S3: model_v9.tar.gz\n",
+      "Creating endpoint: gamma-10000-2023-05-16-14-55\n",
+      "-----------------!\n",
+      "\n",
+      "Completed in : 992.3517553806305s\n"
+     ]
+    }
+   ],
+   "source": [
+    "threads = []\n",
+    "\n",
+    "os.chdir(\"/home/ec2-user/SageMaker\")\n",
+    "\n",
+    "start_time = time.time()\n",
+    "\n",
+    "for config in model_configs:\n",
+    "    thread = Thread(target=start_process, args=(config,))\n",
+    "    thread.start()\n",
+    "    thread.join()\n",
+    "    threads.append(thread)\n",
+    "\n",
+    "for thread in threads:\n",
+    "    thread.join()\n",
+    "    \n",
+    "print(\"\\n\\nCompleted in : \" + str(time.time() - start_time) + \"s\")\n",
+    "\n",
+    "# For redeploying gamma endpoints or promoting gamma endpoints to prod\n",
+    "\n",
+    "# thread1 = Thread(target=deploy_and_create_endpoint, args=(model_configs[0],\"s3://comic-assets/stable-diffusion-v1-4/v2//model_v9.tar.gz\",))\n",
+    "# thread2 = Thread(target=deploy_and_create_endpoint, args=(model_configs[1],\"s3://comic-assets/stable-diffusion-v1-4/v2//anime_mode_with_lora.tar.gz\",))\n",
+    "# thread3 = Thread(target=deploy_and_create_endpoint, args=(model_configs[0],\"s3://comic-assets/stable-diffusion-v1-4/v2//model_v5.3_comic.tar.gz\",))\n",
+    "# thread4 = Thread(target=deploy_and_create_endpoint, args=(model_configs[3],\"s3://comic-assets/stable-diffusion-v1-4/v2//model_v5.2_other.tar.gz\",))\n",
+    "\n",
+    "# thread1.start()\n",
+    "# thread2.start()\n",
+    "# thread3.start()\n",
+    "# thread4.start()\n",
+    "\n",
+    "# thread1.join()\n",
+    "# thread2.join()\n",
+    "# thread3.join()\n",
+    "# thread4.join()\n",
+    "\n",
+    "# print(\"Done\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39f007f2-0ff8-487c-b5d7-158f0947b7fd",
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "# import sagemaker\n",
+    "# import boto3\n",
+    "# import time \n",
+    "\n",
+    "# start = time.time()\n",
+    "\n",
+    "# sess = sagemaker.Session()\n",
+    "# # sagemaker session bucket -> used for uploading data, models and logs\n",
+    "# # sagemaker will automatically create this bucket if it not exists\n",
+    "# sagemaker_session_bucket=None\n",
+    "# if sagemaker_session_bucket is None and sess is not None:\n",
+    "#     # set to default bucket if a bucket name is not given\n",
+    "#     sagemaker_session_bucket = sess.default_bucket()\n",
+    "\n",
+    "# try:\n",
+    "#     role = sagemaker.get_execution_role()\n",
+    "# except ValueError:\n",
+    "#     iam = boto3.client('iam')\n",
+    "#     role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']\n",
+    "\n",
+    "# sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)\n",
+    "\n",
+    "# print(f\"sagemaker role arn: {role}\")\n",
+    "# print(f\"sagemaker bucket: {sess.default_bucket()}\")\n",
+    "# print(f\"sagemaker session region: {sess.boto_region_name}\")\n",
+    "# print(sagemaker.get_execution_role())\n",
+    "\n",
+    "# from sagemaker.s3 import S3Uploader\n",
+    "\n",
+    "# print(\"Uploading model to S3\")\n",
+    "\n",
+    "# # upload model.tar.gz to s3\n",
+    "# s3_model_uri=S3Uploader.upload(local_path=\"model.tar.gz\", desired_s3_uri=f\"s3://comic-assets/stable-diffusion-v1-4/v2/\")\n",
+    "\n",
+    "# print(f\"model uploaded to: {s3_model_uri}\")\n",
+    "\n",
+    "\n",
+    "# from sagemaker.huggingface.model import HuggingFaceModel\n",
+    "\n",
+    "# VpcConfig = {\n",
+    "#       \"Subnets\": [\n",
+    "#           \"subnet-0df3f71df4c7b29e5\",\n",
+    "#           \"subnet-0d753b7fc74b5ee68\"\n",
+    "#           ],\n",
+    "#       \"SecurityGroupIds\": [\n",
+    "#           \"sg-033a7948e79a501cd\"\n",
+    "#           ]\n",
+    "#        }\n",
+    "\n",
+    "# # create Hugging Face Model Class\n",
+    "# huggingface_model = HuggingFaceModel(\n",
+    "#    model_data=s3_model_uri,      # path to your model and script\n",
+    "#    role=role,                    # iam role with permissions to create an Endpoint\n",
+    "#    transformers_version=\"4.17\",  # transformers version used\n",
+    "#    pytorch_version=\"1.10\",       # pytorch version used\n",
+    "#    py_version='py38',# python version used\n",
+    "#    vpc_config=VpcConfig,\n",
+    "# )\n",
+    "\n",
+    "# print(\"Deploying model\")\n",
+    "\n",
+    "# predictor = huggingface_model.deploy(\n",
+    "#     initial_instance_count=1,\n",
+    "#     instance_type=\"ml.g4dn.xlarge\",\n",
+    "#     # endpoint_name=endpoint_name\n",
+    "# )\n",
+    "\n",
+    "# print(f\"Done {time.time() - start}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa95a262-d6ba-4e61-8657-6f8e5bab74a1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "524ca546-2a67-4b51-9cda-a1b51a49c339",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!sudo yum install git-lfs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c7e661f-5eee-4357-80f6-e7563941a812",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "availableInstances": [
+   {
+    "_defaultOrder": 0,
+    "_isFastLaunch": true,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 4,
+    "name": "ml.t3.medium",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 1,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.t3.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 2,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.t3.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 3,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.t3.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 4,
+    "_isFastLaunch": true,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.m5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 5,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.m5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 6,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.m5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 7,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.m5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 8,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.m5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 9,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.m5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 10,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.m5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 11,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.m5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 12,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.m5d.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 13,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.m5d.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 14,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.m5d.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 15,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.m5d.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 16,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.m5d.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 17,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.m5d.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 18,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.m5d.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 19,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.m5d.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 20,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": true,
+    "memoryGiB": 0,
+    "name": "ml.geospatial.interactive",
+    "supportedImageNames": [
+     "sagemaker-geospatial-v1-0"
+    ],
+    "vcpuNum": 0
+   },
+   {
+    "_defaultOrder": 21,
+    "_isFastLaunch": true,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 4,
+    "name": "ml.c5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 22,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.c5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 23,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.c5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 24,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.c5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 25,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 72,
+    "name": "ml.c5.9xlarge",
+    "vcpuNum": 36
+   },
+   {
+    "_defaultOrder": 26,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 96,
+    "name": "ml.c5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 27,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 144,
+    "name": "ml.c5.18xlarge",
+    "vcpuNum": 72
+   },
+   {
+    "_defaultOrder": 28,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.c5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 29,
+    "_isFastLaunch": true,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.g4dn.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 30,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.g4dn.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 31,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.g4dn.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 32,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.g4dn.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 33,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.g4dn.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 34,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.g4dn.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 35,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 61,
+    "name": "ml.p3.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 36,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 244,
+    "name": "ml.p3.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 37,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 488,
+    "name": "ml.p3.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 38,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.p3dn.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 39,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.r5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 40,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.r5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 41,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.r5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 42,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.r5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 43,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.r5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 44,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.r5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 45,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 512,
+    "name": "ml.r5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 46,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.r5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 47,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.g5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 48,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.g5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 49,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.g5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 50,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.g5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 51,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.g5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 52,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.g5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 53,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.g5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 54,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.g5.48xlarge",
+    "vcpuNum": 192
+   }
+  ],
+  "instance_type": "ml.t3.medium",
+  "kernelspec": {
+   "display_name": "conda_pytorch_p39",
+   "language": "python",
+   "name": "conda_pytorch_p39"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

handler.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from typing import Any, Dict, List
+from inference import model_fn, predict_fn
+class EndpointHandler:
+    def __init__(self, path=""):
+        return model_fn(path)
+    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
+        return predict_fn(data, None)

inference.py ADDED Viewed

	@@ -0,0 +1,341 @@

+from typing import List, Optional
+import torch
+from internals.data.dataAccessor import update_db
+from internals.data.task import Task, TaskType
+from internals.pipelines.commons import Img2Img, Text2Img
+from internals.pipelines.controlnets import ControlNet
+from internals.pipelines.img_classifier import ImageClassifier
+from internals.pipelines.img_to_text import Image2Text
+from internals.pipelines.prompt_modifier import PromptModifier
+from internals.pipelines.safety_checker import SafetyChecker
+from internals.util.args import apply_style_args
+from internals.util.avatar import Avatar
+from internals.util.cache import auto_clear_cuda_and_gc
+from internals.util.commons import pickPoses, upload_image, upload_images
+from internals.util.config import set_configs_from_task, set_root_dir
+from internals.util.failure_hander import FailureHandler
+from internals.util.lora_style import LoraStyle
+from internals.util.slack import Slack
+torch.backends.cudnn.benchmark = True
+torch.backends.cuda.matmul.allow_tf32 = True
+num_return_sequences = 4  # the number of results to generate
+auto_mode = False
+prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
+img2text = Image2Text()
+img_classifier = ImageClassifier()
+controlnet = ControlNet()
+lora_style = LoraStyle()
+text2img_pipe = Text2Img()
+img2img_pipe = Img2Img()
+safety_checker = SafetyChecker()
+slack = Slack()
+avatar = Avatar()
+def get_patched_prompt(task: Task):
+    def add_style_and_character(prompt: List[str], additional: Optional[str] = None):
+        for i in range(len(prompt)):
+            prompt[i] = avatar.add_code_names(prompt[i])
+            prompt[i] = lora_style.prepend_style_to_prompt(prompt[i], task.get_style())
+            if additional:
+                prompt[i] = additional + " " + prompt[i]
+    prompt = task.get_prompt()
+    if task.is_prompt_engineering():
+        prompt = prompt_modifier.modify(prompt)
+    else:
+        prompt = [prompt] * num_return_sequences
+    ori_prompt = [task.get_prompt()] * num_return_sequences
+    class_name = None
+    # if task.get_imageUrl():
+    #     class_name = img_classifier.classify(
+    #         task.get_imageUrl(), task.get_width(), task.get_height()
+    #     )
+    add_style_and_character(ori_prompt, class_name)
+    add_style_and_character(prompt, class_name)
+    print({"prompts": prompt})
+    return (prompt, ori_prompt)
+def get_patched_prompt_tile_upscale(task: Task):
+    if task.get_prompt():
+        prompt = task.get_prompt()
+    else:
+        prompt = img2text.process(task.get_imageUrl())
+    prompt = avatar.add_code_names(prompt)
+    prompt = lora_style.prepend_style_to_prompt(prompt, task.get_style())
+    class_name = img_classifier.classify(
+        task.get_imageUrl(), task.get_width(), task.get_height()
+    )
+    prompt = class_name + " " + prompt
+    print({"prompt": prompt})
+    return prompt
+@update_db
+@auto_clear_cuda_and_gc(controlnet)
+@slack.auto_send_alert
+def canny(task: Task):
+    prompt, _ = get_patched_prompt(task)
+    controlnet.load_canny()
+    # pipe2 is used for canny and pose
+    lora_patcher = lora_style.get_patcher(controlnet.pipe2, task.get_style())
+    lora_patcher.patch()
+    images, has_nsfw = controlnet.process_canny(
+        prompt=prompt,
+        imageUrl=task.get_imageUrl(),
+        seed=task.get_seed(),
+        steps=task.get_steps(),
+        width=task.get_width(),
+        height=task.get_height(),
+        guidance_scale=task.get_cy_guidance_scale(),
+        negative_prompt=[
+            f"monochrome, neon, x-ray, negative image, oversaturated, {task.get_negative_prompt()}"
+        ]
+        * num_return_sequences,
+        **lora_patcher.kwargs(),
+    )
+    generated_image_urls = upload_images(images, "_canny", task.get_taskId())
+    lora_patcher.cleanup()
+    controlnet.cleanup()
+    return {
+        "modified_prompts": prompt,
+        "generated_image_urls": generated_image_urls,
+        "has_nsfw": has_nsfw,
+    }
+@update_db
+@auto_clear_cuda_and_gc(controlnet)
+@slack.auto_send_alert
+def tile_upscale(task: Task):
+    output_key = "crecoAI/{}_tile_upscaler.png".format(task.get_taskId())
+    prompt = get_patched_prompt_tile_upscale(task)
+    controlnet.load_tile_upscaler()
+    lora_patcher = lora_style.get_patcher(controlnet.pipe, task.get_style())
+    lora_patcher.patch()
+    images, has_nsfw = controlnet.process_tile_upscaler(
+        imageUrl=task.get_imageUrl(),
+        seed=task.get_seed(),
+        steps=task.get_steps(),
+        width=task.get_width(),
+        height=task.get_height(),
+        prompt=prompt,
+        resize_dimension=task.get_resize_dimension(),
+        negative_prompt=task.get_negative_prompt(),
+        guidance_scale=task.get_ti_guidance_scale(),
+    )
+    generated_image_url = upload_image(images[0], output_key)
+    lora_patcher.cleanup()
+    controlnet.cleanup()
+    return {
+        "modified_prompts": prompt,
+        "generated_image_url": generated_image_url,
+        "has_nsfw": has_nsfw,
+    }
+@update_db
+@auto_clear_cuda_and_gc(controlnet)
+@slack.auto_send_alert
+def pose(task: Task, s3_outkey: str = "_pose", poses: Optional[list] = None):
+    prompt, _ = get_patched_prompt(task)
+    controlnet.load_pose()
+    # pipe2 is used for canny and pose
+    lora_patcher = lora_style.get_patcher(controlnet.pipe2, task.get_style())
+    lora_patcher.patch()
+    if poses is None:
+        poses = [controlnet.detect_pose(task.get_imageUrl())] * num_return_sequences
+    images, has_nsfw = controlnet.process_pose(
+        prompt=prompt,
+        image=poses,
+        seed=task.get_seed(),
+        steps=task.get_steps(),
+        negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+        width=task.get_width(),
+        height=task.get_height(),
+        guidance_scale=task.get_po_guidance_scale(),
+        **lora_patcher.kwargs(),
+    )
+    generated_image_urls = upload_images(images, s3_outkey, task.get_taskId())
+    lora_patcher.cleanup()
+    controlnet.cleanup()
+    return {
+        "modified_prompts": prompt,
+        "generated_image_urls": generated_image_urls,
+        "has_nsfw": has_nsfw,
+    }
+@update_db
+@auto_clear_cuda_and_gc(controlnet)
+@slack.auto_send_alert
+def text2img(task: Task):
+    prompt, ori_prompt = get_patched_prompt(task)
+    lora_patcher = lora_style.get_patcher(text2img_pipe.pipe, task.get_style())
+    lora_patcher.patch()
+    torch.manual_seed(task.get_seed())
+    images, has_nsfw = text2img_pipe.process(
+        prompt=ori_prompt,
+        modified_prompts=prompt,
+        num_inference_steps=task.get_steps(),
+        guidance_scale=7.5,
+        height=task.get_height(),
+        width=task.get_width(),
+        negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+        iteration=task.get_iteration(),
+        **lora_patcher.kwargs(),
+    )
+    generated_image_urls = upload_images(images, "", task.get_taskId())
+    lora_patcher.cleanup()
+    return {
+        "modified_prompts": prompt,
+        "generated_image_urls": generated_image_urls,
+        "has_nsfw": has_nsfw,
+    }
+@update_db
+@auto_clear_cuda_and_gc(controlnet)
+@slack.auto_send_alert
+def img2img(task: Task):
+    prompt, _ = get_patched_prompt(task)
+    lora_patcher = lora_style.get_patcher(img2img_pipe.pipe, task.get_style())
+    lora_patcher.patch()
+    torch.manual_seed(task.get_seed())
+    images, has_nsfw = img2img_pipe.process(
+        prompt=prompt,
+        imageUrl=task.get_imageUrl(),
+        negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+        steps=task.get_steps(),
+        width=task.get_width(),
+        height=task.get_height(),
+        strength=task.get_i2i_strength(),
+        guidance_scale=task.get_i2i_guidance_scale(),
+        **lora_patcher.kwargs(),
+    )
+    generated_image_urls = upload_images(images, "_imgtoimg", task.get_taskId())
+    lora_patcher.cleanup()
+    return {
+        "modified_prompts": prompt,
+        "generated_image_urls": generated_image_urls,
+        "has_nsfw": has_nsfw,
+    }
+def model_fn(model_dir):
+    print("Logs: model loaded .... starts")
+    set_root_dir(__file__)
+    FailureHandler.register()
+    avatar.load_local()
+    prompt_modifier.load()
+    img2text.load()
+    img_classifier.load()
+    lora_style.load(model_dir)
+    safety_checker.load()
+    controlnet.load(model_dir)
+    text2img_pipe.load(model_dir)
+    img2img_pipe.create(text2img_pipe)
+    safety_checker.apply(text2img_pipe)
+    safety_checker.apply(img2img_pipe)
+    safety_checker.apply(controlnet)
+    print("Logs: model loaded ....")
+    return
+@FailureHandler.clear
+def predict_fn(data, pipe):
+    task = Task(data)
+    print("task is ", data)
+    FailureHandler.handle(task)
+    try:
+        # Set set_environment
+        set_configs_from_task(task)
+        # Apply arguments
+        apply_style_args(data)
+        # Re-fetch styles
+        lora_style.fetch_styles()
+        # Fetch avatars
+        avatar.fetch_from_network(task.get_model_id())
+        task_type = task.get_type()
+        if task_type == TaskType.TEXT_TO_IMAGE:
+            # character sheet
+            if "character sheet" in task.get_prompt().lower():
+                return pose(task, s3_outkey="", poses=pickPoses())
+            else:
+                return text2img(task)
+        elif task_type == TaskType.IMAGE_TO_IMAGE:
+            return img2img(task)
+        elif task_type == TaskType.CANNY:
+            return canny(task)
+        elif task_type == TaskType.POSE:
+            return pose(task)
+        elif task_type == TaskType.TILE_UPSCALE:
+            return tile_upscale(task)
+        else:
+            raise Exception("Invalid task type")
+    except Exception as e:
+        print(f"Error: {e}")
+        slack.error_alert(task, e)
+        return None

inference2.py ADDED Viewed

	@@ -0,0 +1,169 @@

+from io import BytesIO
+import torch
+from internals.data.dataAccessor import update_db
+from internals.data.task import ModelType, Task, TaskType
+from internals.pipelines.inpainter import InPainter
+from internals.pipelines.object_remove import ObjectRemoval
+from internals.pipelines.prompt_modifier import PromptModifier
+from internals.pipelines.remove_background import RemoveBackground
+from internals.pipelines.safety_checker import SafetyChecker
+from internals.pipelines.upscaler import Upscaler
+from internals.util.avatar import Avatar
+from internals.util.cache import clear_cuda
+from internals.util.commons import (construct_default_s3_url, upload_image,
+                                    upload_images)
+from internals.util.config import set_configs_from_task, set_root_dir
+from internals.util.failure_hander import FailureHandler
+from internals.util.slack import Slack
+torch.backends.cudnn.benchmark = True
+torch.backends.cuda.matmul.allow_tf32 = True
+num_return_sequences = 4
+auto_mode = False
+slack = Slack()
+prompt_modifier = PromptModifier(num_of_sequences=num_return_sequences)
+upscaler = Upscaler()
+inpainter = InPainter()
+safety_checker = SafetyChecker()
+object_removal = ObjectRemoval()
+avatar = Avatar()
+@update_db
+@slack.auto_send_alert
+def remove_bg(task: Task):
+    remove_background = RemoveBackground()
+    output_image = remove_background.remove(task.get_imageUrl())
+    output_key = "crecoAI/{}_rmbg.png".format(task.get_taskId())
+    upload_image(output_image, output_key)
+    return {"generated_image_url": construct_default_s3_url(output_key)}
+@update_db
+@slack.auto_send_alert
+def inpaint(task: Task):
+    prompt = avatar.add_code_names(task.get_prompt())
+    if task.is_prompt_engineering():
+        prompt = prompt_modifier.modify(prompt)
+    else:
+        prompt = [prompt] * num_return_sequences
+    print({"prompts": prompt})
+    images = inpainter.process(
+        prompt=prompt,
+        image_url=task.get_imageUrl(),
+        mask_image_url=task.get_maskImageUrl(),
+        width=task.get_width(),
+        height=task.get_height(),
+        seed=task.get_seed(),
+        negative_prompt=[task.get_negative_prompt()] * num_return_sequences,
+    )
+    generated_image_urls = upload_images(images, "_inpaint", task.get_taskId())
+    clear_cuda()
+    return {"modified_prompts": prompt, "generated_image_urls": generated_image_urls}
+@update_db
+@slack.auto_send_alert
+def remove_object(task: Task):
+    output_key = "crecoAI/{}_object_remove.png".format(task.get_taskId())
+    images = object_removal.process(
+        image_url=task.get_imageUrl(),
+        mask_image_url=task.get_maskImageUrl(),
+        seed=task.get_seed(),
+        width=task.get_width(),
+        height=task.get_height(),
+    )
+    generated_image_urls = upload_image(images[0], output_key)
+    clear_cuda()
+    return {"generated_image_urls": generated_image_urls}
+@update_db
+@slack.auto_send_alert
+def upscale_image(task: Task):
+    output_key = "crecoAI/{}_upscale.png".format(task.get_taskId())
+    out_img = None
+    if task.get_modelType() == ModelType.ANIME:
+        print("Using Anime model")
+        out_img = upscaler.upscale_anime(
+            image=task.get_imageUrl(), resize_dimension=task.get_resize_dimension()
+        )
+    else:
+        print("Using Real model")
+        out_img = upscaler.upscale(
+            image=task.get_imageUrl(), resize_dimension=task.get_resize_dimension()
+        )
+    upload_image(BytesIO(out_img), output_key)
+    return {"generated_image_url": construct_default_s3_url(output_key)}
+def model_fn(model_dir):
+    print("Logs: model loaded .... starts")
+    set_root_dir(__file__)
+    FailureHandler.register()
+    avatar.load_local()
+    prompt_modifier.load()
+    safety_checker.load()
+    object_removal.load(model_dir)
+    upscaler.load()
+    inpainter.load()
+    safety_checker.apply(inpainter)
+    print("Logs: model loaded ....")
+    return
+@FailureHandler.clear
+def predict_fn(data, pipe):
+    task = Task(data)
+    print("task is ", data)
+    FailureHandler.handle(task)
+    # Set set_environment
+    set_configs_from_task(task)
+    try:
+        # Set set_environment
+        set_configs_from_task(task)
+        # Fetch avatars
+        avatar.fetch_from_network(task.get_model_id())
+        task_type = task.get_type()
+        if task_type == TaskType.REMOVE_BG:
+            return remove_bg(task)
+        elif task_type == TaskType.INPAINT:
+            return inpaint(task)
+        elif task_type == TaskType.UPSCALE_IMAGE:
+            return upscale_image(task)
+        elif task_type == TaskType.OBJECT_REMOVAL:
+            return remove_object(task)
+        else:
+            raise Exception("Invalid task type")
+    except Exception as e:
+        print(f"Error: {e}")
+        slack.error_alert(task, e)
+        return None

internals/__init__.py ADDED Viewed

File without changes

internals/data/__init__.py ADDED Viewed

File without changes

internals/data/dataAccessor.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import traceback
+from typing import Dict, List, Optional
+import requests
+from pydash import includes
+from internals.data.task import Task
+from internals.util.config import api_endpoint, api_headers
+from internals.util.slack import Slack
+def updateSource(sourceId, userId, state):
+    print("update source is called")
+    url = api_endpoint() + f"/comic-crecoai/source/{sourceId}"
+    headers = {
+        "Content-Type": "application/json",
+        "user-id": str(userId),
+        **api_headers(),
+    }
+    data = {"state": state}
+    try:
+        response = requests.patch(url, headers=headers, json=data, timeout=10)
+        print("update source response", response)
+    except requests.exceptions.Timeout:
+        print("Request timed out while updating source")
+    except requests.exceptions.RequestException as e:
+        print(f"Error while updating source: {e}")
+    return
+def saveGeneratedImages(sourceId, userId, has_nsfw: bool):
+    print("save generation called")
+    url = api_endpoint() + "/comic-crecoai/source/" + str(sourceId) + "/generatedImages"
+    headers = {
+        "Content-Type": "application/json",
+        "user-id": str(userId),
+        **api_headers(),
+    }
+    data = {"state": "ACTIVE", "has_nsfw": has_nsfw}
+    try:
+        requests.patch(url, headers=headers, json=data)
+        # print("save generation response", response)
+    except requests.exceptions.Timeout:
+        print("Request timed out while saving image")
+    except requests.exceptions.RequestException as e:
+        print("Failed to mark source as active: ", e)
+        return
+    return
+def getStyles() -> Optional[Dict]:
+    url = api_endpoint() + "/comic-crecoai/style"
+    try:
+        response = requests.get(
+            url,
+            timeout=10,
+            headers={"x-api-key": "kGyEMp)oHB(zf^E5>-{o]I%go", **api_headers()},
+        )
+        return response.json()
+    except requests.exceptions.Timeout:
+        print("Request timed out while fetching styles")
+    except requests.exceptions.RequestException as e:
+        print(f"Error while fetching styles: {e}")
+    return None
+def getCharacters(model_id: str) -> Optional[List]:
+    url = api_endpoint() + "/comic-crecoai/model/{}".format(model_id)
+    try:
+        response = requests.get(url, timeout=10, headers=api_headers())
+        response = response.json()
+        response = response["data"]["characters"]
+        return response
+    except requests.exceptions.Timeout:
+        print("Request timed out while fetching characters")
+    except Exception as e:
+        print(f"Error while fetching characters: {e}")
+    return None
+def update_db(func):
+    def caller(*args, **kwargs):
+        if type(args[0]) is not Task:
+            raise Exception("First argument must be a Task object")
+        task = args[0]
+        try:
+            updateSource(task.get_sourceId(), task.get_userId(), "INPROGRESS")
+            rargs = func(*args, **kwargs)
+            has_nsfw = rargs.get("has_nsfw", False)
+            updateSource(task.get_sourceId(), task.get_userId(), "COMPLETED")
+            saveGeneratedImages(task.get_sourceId(), task.get_userId(), has_nsfw)
+            return rargs
+        except Exception as e:
+            print("Error processing image: {}".format(str(e)))
+            traceback.print_exc()
+            slack = Slack()
+            slack.error_alert(task, e)
+            updateSource(task.get_sourceId(), task.get_userId(), "FAILED")
+    return caller

internals/data/result.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from internals.util.config import get_nsfw_access
+class Result:
+    images, nsfw = None, None
+    def __init__(self, images, nsfw):
+        self.images = images
+        self.nsfw = nsfw
+    @staticmethod
+    def from_result(result):
+        has_nsfw = result.nsfw_content_detected
+        if has_nsfw and isinstance(has_nsfw, list):
+            has_nsfw = any(has_nsfw)
+        has_nsfw = ~get_nsfw_access() and has_nsfw
+        return (result.images, bool(has_nsfw))
+        # return Result(result.images, result.has_nsfw_concepts)

internals/data/task.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from enum import Enum
+from typing import Union
+import numpy as np
+class TaskType(Enum):
+    TEXT_TO_IMAGE = "GENERATE_AI_IMAGE"
+    IMAGE_TO_IMAGE = "IMAGE_TO_IMAGE"
+    POSE = "POSE"
+    CANNY = "CANNY"
+    REMOVE_BG = "REMOVE_BG"
+    INPAINT = "INPAINT"
+    UPSCALE_IMAGE = "UPSCALE_IMAGE"
+    TILE_UPSCALE = "TILE_UPSCALE"
+    OBJECT_REMOVAL = "OBJECT_REMOVAL"
+class ModelType(Enum):
+    REAL = 10000
+    ANIME = 10001
+    COMIC = 10002
+class Task:
+    def __init__(self, data):
+        self.__data = data
+        if data.get("seed", -1) == None or self.get_seed() == -1:
+            self.__data["seed"] = np.random.randint(0, np.iinfo(np.int64).max)
+        prompt = data.get("prompt", "")
+        if prompt is None:
+            self.__data["prompt"] = ""
+        else:
+            self.__data["prompt"] = data.get("prompt", "")[:200]
+    def get_taskId(self) -> str:
+        return self.__data.get("task_id")
+    def get_sourceId(self) -> str:
+        return self.__data.get("source_id")
+    def get_imageUrl(self) -> str:
+        return self.__data.get("imageUrl", None)
+    def get_prompt(self) -> str:
+        return self.__data.get("prompt", "")
+    def get_userId(self) -> str:
+        return self.__data.get("userId", "")
+    def get_email(self) -> str:
+        return self.__data.get("email", "")
+    def get_style(self) -> str:
+        return self.__data.get("style", None)
+    def get_iteration(self) -> float:
+        return float(self.__data.get("iteration", 3.0))
+    def get_modelType(self) -> ModelType:
+        id = self.get_model_id()
+        return ModelType(id)
+    def get_model_id(self) -> int:
+        return int(self.__data.get("modelId", 10000))
+    def get_width(self) -> int:
+        return int(self.__data.get("width", 512))
+    def get_height(self) -> int:
+        return int(self.__data.get("height", 512))
+    def get_seed(self) -> int:
+        return int(self.__data.get("seed", -1))
+    def get_steps(self) -> int:
+        return int(self.__data.get("steps", "75"))
+    def get_type(self) -> Union[TaskType, None]:
+        try:
+            return TaskType(self.__data.get("task_type"))
+        except ValueError:
+            return None
+    def get_maskImageUrl(self) -> str:
+        return self.__data.get("maskImageUrl")
+    def get_negative_prompt(self) -> str:
+        return self.__data.get("negative_prompt", "")
+    def is_prompt_engineering(self) -> bool:
+        return self.__data.get("auto_mode", True)
+    def get_queue_name(self) -> str:
+        return self.__data.get("queue_name", "")
+    def get_resize_dimension(self) -> int:
+        return self.__data.get("resize_dimension", 1024)
+    def get_ti_guidance_scale(self) -> float:
+        return self.__data.get("ti_guidance_scale", 7.5)
+    def get_i2i_guidance_scale(self) -> float:
+        return self.__data.get("i2i_guidance_scale", 7.5)
+    def get_i2i_strength(self) -> float:
+        return self.__data.get("i2i_strength", 0.75)
+    def get_cy_guidance_scale(self) -> float:
+        return self.__data.get("cy_guidance_scale", 9)
+    def get_po_guidance_scale(self) -> float:
+        return self.__data.get("po_guidance_scale", 7.5)
+    def get_nsfw_threshold(self) -> float:
+        return self.__data.get("nsfw_threshold", 0.03)
+    def can_access_nsfw(self) -> bool:
+        return self.__data.get("can_access_nsfw", False)
+    def get_access_token(self) -> str:
+        return self.__data.get("access_token", "")
+    def get_raw(self) -> dict:
+        return self.__data.copy()

internals/pipelines/commons.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from typing import Any, Callable, Dict, List, Optional, Union
+import torch
+from diffusers import StableDiffusionImg2ImgPipeline
+from internals.data.result import Result
+from internals.pipelines.twoStepPipeline import two_step_pipeline
+from internals.util.commons import disable_safety_checker, download_image
+class AbstractPipeline:
+    def load(self, model_dir: str):
+        pass
+    def create(self, pipe):
+        pass
+class Text2Img(AbstractPipeline):
+    def load(self, model_dir: str):
+        self.pipe = two_step_pipeline.from_pretrained(
+            model_dir, torch_dtype=torch.float16
+        ).to("cuda")
+        self.__patch()
+    def create(self, pipeline: AbstractPipeline):
+        self.pipe = two_step_pipeline(**pipeline.pipe.components).to("cuda")
+        self.__patch()
+    def __patch(self):
+        self.pipe.enable_xformers_memory_efficient_attention()
+    @torch.inference_mode()
+    def process(
+        self,
+        prompt: Union[str, List[str]] = None,
+        modified_prompts: Union[str, List[str]] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 7.5,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: int = 1,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        iteration: float = 3.0,
+    ):
+        result = self.pipe.two_step_pipeline(
+            prompt=prompt,
+            modified_prompts=modified_prompts,
+            height=height,
+            width=width,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            eta=eta,
+            generator=generator,
+            latents=latents,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            output_type=output_type,
+            return_dict=return_dict,
+            callback=callback,
+            callback_steps=callback_steps,
+            cross_attention_kwargs=cross_attention_kwargs,
+            iteration=iteration,
+        )
+        return Result.from_result(result)
+class Img2Img(AbstractPipeline):
+    def load(self, model_dir: str):
+        self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+            model_dir, torch_dtype=torch.float16
+        ).to("cuda")
+        self.__patch()
+    def create(self, pipeline: AbstractPipeline):
+        self.pipe = StableDiffusionImg2ImgPipeline(**pipeline.pipe.components).to(
+            "cuda"
+        )
+        self.__patch()
+    def __patch(self):
+        self.pipe.enable_xformers_memory_efficient_attention()
+    @torch.inference_mode()
+    def process(
+        self,
+        prompt: List[str],
+        imageUrl: str,
+        negative_prompt: List[str],
+        strength: float,
+        guidance_scale: float,
+        steps: int,
+        width: int,
+        height: int,
+    ):
+        image = download_image(imageUrl).resize((width, height))
+        result = self.pipe.__call__(
+            prompt=prompt,
+            image=image,
+            strength=strength,
+            negative_prompt=negative_prompt,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=1,
+            num_inference_steps=steps,
+        )
+        return Result.from_result(result)

internals/pipelines/controlnets.py ADDED Viewed

	@@ -0,0 +1,221 @@

+from typing import List
+import cv2
+import numpy as np
+import torch
+from controlnet_aux import OpenposeDetector
+from diffusers import (
+    ControlNetModel,
+    DiffusionPipeline,
+    StableDiffusionControlNetPipeline,
+    UniPCMultistepScheduler,
+)
+from PIL import Image
+from tqdm import gui
+from internals.data.result import Result
+from internals.pipelines.commons import AbstractPipeline
+from internals.util.cache import clear_cuda_and_gc
+from internals.util.commons import download_image
+class ControlNet(AbstractPipeline):
+    __current_task_name = ""
+    def load(self, model_dir: str):
+        # we will load canny by default
+        self.load_canny()
+        # controlnet pipeline for canny and pose
+        pipe = DiffusionPipeline.from_pretrained(
+            model_dir,
+            controlnet=self.controlnet,
+            torch_dtype=torch.float16,
+            custom_pipeline="stable_diffusion_controlnet_img2img",
+        ).to("cuda")
+        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+        pipe.enable_model_cpu_offload()
+        pipe.enable_xformers_memory_efficient_attention()
+        self.pipe = pipe
+        # controlnet pipeline for tile upscaler
+        pipe2 = StableDiffusionControlNetPipeline(**pipe.components).to("cuda")
+        pipe2.scheduler = UniPCMultistepScheduler.from_config(pipe2.scheduler.config)
+        pipe2.enable_xformers_memory_efficient_attention()
+        self.pipe2 = pipe2
+    def load_canny(self):
+        if self.__current_task_name == "canny":
+            return
+        canny = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16
+        ).to("cuda")
+        self.__current_task_name = "canny"
+        self.controlnet = canny
+        if hasattr(self, "pipe"):
+            self.pipe.controlnet = canny
+        if hasattr(self, "pipe2"):
+            self.pipe2.controlnet = canny
+        clear_cuda_and_gc()
+    def load_pose(self):
+        if self.__current_task_name == "pose":
+            return
+        pose = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11p_sd15_openpose", torch_dtype=torch.float16
+        ).to("cuda")
+        self.__current_task_name = "pose"
+        self.controlnet = pose
+        if hasattr(self, "pipe"):
+            self.pipe.controlnet = pose
+        if hasattr(self, "pipe2"):
+            self.pipe2.controlnet = pose
+        clear_cuda_and_gc()
+    def load_tile_upscaler(self):
+        if self.__current_task_name == "tile_upscaler":
+            return
+        tile_upscaler = ControlNetModel.from_pretrained(
+            "lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16
+        ).to("cuda")
+        self.__current_task_name = "tile_upscaler"
+        self.controlnet = tile_upscaler
+        if hasattr(self, "pipe"):
+            self.pipe.controlnet = tile_upscaler
+        if hasattr(self, "pipe2"):
+            self.pipe2.controlnet = tile_upscaler
+        clear_cuda_and_gc()
+    def cleanup(self):
+        self.pipe.controlnet = None
+        self.pipe2.controlnet = None
+        self.controlnet = None
+        self.__current_task_name = ""
+        clear_cuda_and_gc()
+    @torch.inference_mode()
+    def process_canny(
+        self,
+        prompt: List[str],
+        imageUrl: str,
+        seed: int,
+        steps: int,
+        negative_prompt: List[str],
+        guidance_scale: float,
+        height: int,
+        width: int,
+    ):
+        if self.__current_task_name != "canny":
+            raise Exception("ControlNet is not loaded with canny model")
+        torch.manual_seed(seed)
+        init_image = download_image(imageUrl).resize((width, height))
+        init_image = self.__canny_detect_edge(init_image)
+        result = self.pipe2.__call__(
+            prompt=prompt,
+            image=init_image,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=1,
+            negative_prompt=negative_prompt,
+            num_inference_steps=steps,
+            height=height,
+            width=width,
+        )
+        return Result.from_result(result)
+    @torch.inference_mode()
+    def process_pose(
+        self,
+        prompt: List[str],
+        image: List[Image.Image],
+        seed: int,
+        steps: int,
+        guidance_scale: float,
+        negative_prompt: List[str],
+        height: int,
+        width: int,
+    ):
+        if self.__current_task_name != "pose":
+            raise Exception("ControlNet is not loaded with pose model")
+        torch.manual_seed(seed)
+        result = self.pipe2.__call__(
+            prompt=prompt,
+            image=image,
+            num_images_per_prompt=1,
+            num_inference_steps=steps,
+            negative_prompt=negative_prompt,
+            guidance_scale=guidance_scale,
+            height=height,
+            width=width,
+        )
+        return Result.from_result(result)
+    @torch.inference_mode()
+    def process_tile_upscaler(
+        self,
+        imageUrl: str,
+        prompt: str,
+        negative_prompt: str,
+        steps: int,
+        seed: int,
+        height: int,
+        width: int,
+        resize_dimension: int,
+        guidance_scale: float,
+    ):
+        if self.__current_task_name != "tile_upscaler":
+            raise Exception("ControlNet is not loaded with tile_upscaler model")
+        torch.manual_seed(seed)
+        init_image = download_image(imageUrl).resize((width, height))
+        condition_image = self.__resize_for_condition_image(
+            init_image, resize_dimension
+        )
+        result = self.pipe.__call__(
+            image=condition_image,
+            prompt=prompt,
+            controlnet_conditioning_image=condition_image,
+            num_inference_steps=steps,
+            negative_prompt=negative_prompt,
+            height=condition_image.size[1],
+            width=condition_image.size[0],
+            strength=1.0,
+            guidance_scale=guidance_scale,
+        )
+        return Result.from_result(result)
+    def detect_pose(self, imageUrl: str) -> Image.Image:
+        detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+        image = download_image(imageUrl)
+        image = detector.__call__(image, hand_and_face=True)
+        return image
+    def __canny_detect_edge(self, image: Image.Image) -> Image.Image:
+        image_array = np.array(image)
+        low_threshold = 100
+        high_threshold = 200
+        image_array = cv2.Canny(image_array, low_threshold, high_threshold)
+        image_array = image_array[:, :, None]
+        image_array = np.concatenate([image_array, image_array, image_array], axis=2)
+        canny_image = Image.fromarray(image_array)
+        return canny_image
+    def __resize_for_condition_image(self, image: Image.Image, resolution: int):
+        input_image = image.convert("RGB")
+        W, H = input_image.size
+        k = float(resolution) / min(W, H)
+        H *= k
+        W *= k
+        H = int(round(H / 64.0)) * 64
+        W = int(round(W / 64.0)) * 64
+        img = input_image.resize((W, H), resample=Image.LANCZOS)
+        return img

internals/pipelines/img_classifier.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import List
+from transformers import pipeline
+from internals.util.commons import download_image
+class ImageClassifier:
+    def __init__(self, candidates: List[str] = ["realistic", "anime", "comic"]):
+        self.__candidates = candidates
+    def load(self):
+        self.pipe = pipeline(
+            "zero-shot-image-classification",
+            model="philschmid/clip-zero-shot-image-classification",
+        )
+    def classify(self, image_url: str, width: int, height: int) -> str:
+        image = download_image(image_url).resize((width, height))
+        results = self.pipe.__call__([image], candidate_labels=self.__candidates)
+        results = results[0]
+        if len(results) > 0:
+            return results[0]["label"]
+        return ""

internals/pipelines/img_to_text.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import re
+import torch
+from torchvision import transforms
+from transformers import BlipForConditionalGeneration, BlipProcessor
+from internals.util.commons import download_image
+class Image2Text:
+    def load(self):
+        self.processor = BlipProcessor.from_pretrained(
+            "Salesforce/blip-image-captioning-large"
+        )
+        self.model = BlipForConditionalGeneration.from_pretrained(
+            "Salesforce/blip-image-captioning-large", torch_dtype=torch.float16
+        ).to("cuda")
+    def process(self, imageUrl: str) -> str:
+        image = download_image(imageUrl).resize((512, 512))
+        inputs = self.processor.__call__(image, return_tensors="pt").to(
+            "cuda", torch.float16
+        )
+        output_ids = self.model.generate(
+            **inputs, do_sample=False, top_p=0.9, max_length=128
+        )
+        output_text = self.processor.batch_decode(output_ids)
+        print(output_text)
+        output_text = output_text[0]
+        output_text = re.sub("</.>|\\n|\[SEP\]", "", output_text)
+        return output_text

internals/pipelines/inpainter.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import List, Union
+import torch
+from diffusers import StableDiffusionInpaintPipeline
+from internals.pipelines.commons import AbstractPipeline
+from internals.util.commons import disable_safety_checker, download_image
+class InPainter(AbstractPipeline):
+    def load(self):
+        self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
+            "jayparmr/icbinp_v8_inpaint_v2",
+            torch_dtype=torch.float16,
+        ).to("cuda")
+        disable_safety_checker(self.pipe)
+    @torch.inference_mode()
+    def process(
+        self,
+        image_url: str,
+        mask_image_url: str,
+        width: int,
+        height: int,
+        seed: int,
+        prompt: Union[str, List[str]],
+        negative_prompt: Union[str, List[str]],
+    ):
+        torch.manual_seed(seed)
+        input_img = download_image(image_url).resize((width, height))
+        mask_img = download_image(mask_image_url).resize((width, height))
+        return self.pipe.__call__(
+            prompt=prompt,
+            image=input_img,
+            mask_image=mask_img,
+            height=height,
+            width=width,
+            negative_prompt=negative_prompt,
+        ).images

internals/pipelines/object_remove.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+from pathlib import Path
+from typing import List
+import cv2
+import numpy as np
+import torch
+import tqdm
+from omegaconf import OmegaConf
+from PIL import Image
+from torch.utils.data._utils.collate import default_collate
+from internals.util.commons import download_file, download_image
+from internals.util.config import get_root_dir
+from saicinpainting.evaluation.utils import move_to_device
+from saicinpainting.training.data.datasets import make_default_val_dataset
+from saicinpainting.training.trainers import load_checkpoint
+class ObjectRemoval:
+    def load(self, model_dir):
+        print("Downloading LAMA model...")
+        self.lama_path = Path.home() / ".cache" / "lama"
+        out_file = self.lama_path / "models" / "best.ckpt"
+        os.makedirs(os.path.dirname(out_file), exist_ok=True)
+        download_file(
+            "https://huggingface.co/akhaliq/lama/resolve/main/best.ckpt", out_file
+        )
+        config = OmegaConf.load(get_root_dir() + "/config.yml")
+        config.training_model.predict_only = True
+        self.model = load_checkpoint(
+            config, str(out_file), strict=False, map_location="cuda"
+        )
+        self.model.freeze()
+        self.model.to("cuda")
+    @torch.no_grad()
+    def process(
+        self,
+        image_url: str,
+        mask_image_url: str,
+        seed: int,
+        width: int,
+        height: int,
+    ) -> List:
+        torch.manual_seed(seed)
+        img_folder = self.lama_path / "images"
+        indir = img_folder / "input"
+        img_folder.mkdir(parents=True, exist_ok=True)
+        indir.mkdir(parents=True, exist_ok=True)
+        download_image(image_url).resize((width, height)).save(indir / "data.png")
+        download_image(mask_image_url).resize((width, height)).save(
+            indir / "data_mask.png"
+        )
+        dataset = make_default_val_dataset(
+            img_folder / "input", img_suffix=".png", pad_out_to_modulo=8
+        )
+        out_images = []
+        for img_i in tqdm.trange(len(dataset)):
+            batch = move_to_device(default_collate([dataset[img_i]]), "cuda")
+            batch["mask"] = (batch["mask"] > 0) * 1
+            batch = self.model(batch)
+            out_path = str(img_folder / "out.png")
+            cur_res = batch["inpainted"][0].permute(1, 2, 0).detach().cpu().numpy()
+            cur_res = np.clip(cur_res * 255, 0, 255).astype("uint8")
+            cur_res = cv2.cvtColor(cur_res, cv2.COLOR_RGB2BGR)
+            cv2.imwrite(out_path, cur_res)
+            image = Image.open(out_path).convert("RGB")
+            out_images.append(image)
+            os.remove(out_path)
+        return out_images

internals/pipelines/prompt_modifier.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from typing import List, Optional
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+class PromptModifier:
+    def __init__(self, num_of_sequences: Optional[int] = 4):
+        self.__blacklist = {"alphonse mucha": "", "adolphe bouguereau": ""}
+        self.__num_of_sequences = num_of_sequences
+    def load(self):
+        self.prompter_model = AutoModelForCausalLM.from_pretrained(
+            "Gustavosta/MagicPrompt-Stable-Diffusion"
+        )
+        self.prompter_tokenizer = AutoTokenizer.from_pretrained(
+            "Gustavosta/MagicPrompt-Stable-Diffusion"
+        )
+        self.prompter_tokenizer.pad_token = self.prompter_tokenizer.eos_token
+        self.prompter_tokenizer.padding_side = "left"
+    def modify(self, text: str) -> List[str]:
+        eos_id = self.prompter_tokenizer.eos_token_id
+        # restricted_words_list = ["octane", "cyber"]
+        # restricted_words_token_ids = prompter_tokenizer(
+        #     restricted_words_list, add_special_tokens=False
+        # ).input_ids
+        generation_config = GenerationConfig(
+            do_sample=False,
+            max_new_tokens=75,
+            num_beams=4,
+            num_return_sequences=self.__num_of_sequences,
+            eos_token_id=eos_id,
+            pad_token_id=eos_id,
+            length_penalty=-1.0,
+        )
+        input_ids = self.prompter_tokenizer(text.strip(), return_tensors="pt").input_ids
+        outputs = self.prompter_model.generate(
+            input_ids, generation_config=generation_config
+        )
+        output_texts = self.prompter_tokenizer.batch_decode(
+            outputs, skip_special_tokens=True
+        )
+        output_texts = self.__patch_blacklist_words(output_texts)
+        return output_texts
+    def __patch_blacklist_words(self, texts: List[str]):
+        def replace_all(text, dic):
+            for i, j in dic.items():
+                text = text.replace(i, j)
+            return text
+        return [replace_all(text, self.__blacklist) for text in texts]

internals/pipelines/remove_background.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import io
+from typing import Union
+from PIL import Image
+from rembg import remove
+from internals.util.commons import read_url
+class RemoveBackground:
+    def remove(self, image: Union[str, Image.Image]) -> Image.Image:
+        if type(image) is str:
+            image = Image.open(io.BytesIO(read_url(image)))
+        output = remove(image)
+        return output

internals/pipelines/safety_checker.py ADDED Viewed

	@@ -0,0 +1,163 @@

+from re import L
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
+from internals.pipelines.commons import AbstractPipeline
+from internals.util.config import get_nsfw_access, get_nsfw_threshold
+def cosine_distance(image_embeds, text_embeds):
+    normalized_image_embeds = nn.functional.normalize(image_embeds)
+    normalized_text_embeds = nn.functional.normalize(text_embeds)
+    return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
+class SafetyChecker:
+    def load(self):
+        self.model = StableDiffusionSafetyCheckerV2.from_pretrained(
+            "CompVis/stable-diffusion-safety-checker", torch_dtype=torch.float16
+        ).to("cuda")
+    def apply(self, pipeline: AbstractPipeline):
+        if hasattr(pipeline, "pipe"):
+            pipeline.pipe.safety_checker = self.model
+        if hasattr(pipeline, "pipe2"):
+            pipeline.pipe2.safety_checker = self.model
+class StableDiffusionSafetyCheckerV2(PreTrainedModel):
+    config_class = CLIPConfig
+    _no_split_modules = ["CLIPEncoderLayer"]
+    def __init__(self, config: CLIPConfig):
+        super().__init__(config)
+        self.vision_model = CLIPVisionModel(config.vision_config)
+        self.visual_projection = nn.Linear(
+            config.vision_config.hidden_size, config.projection_dim, bias=False
+        )
+        self.concept_embeds = nn.Parameter(
+            torch.ones(17, config.projection_dim), requires_grad=False
+        )
+        self.special_care_embeds = nn.Parameter(
+            torch.ones(3, config.projection_dim), requires_grad=False
+        )
+        self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False)
+        self.special_care_embeds_weights = nn.Parameter(
+            torch.ones(3), requires_grad=False
+        )
+    @torch.no_grad()
+    def forward(self, clip_input, images):
+        pooled_output = self.vision_model(clip_input)[1]  # pooled_output
+        image_embeds = self.visual_projection(pooled_output)
+        # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
+        special_cos_dist = (
+            cosine_distance(image_embeds, self.special_care_embeds)
+            .cpu()
+            .float()
+            .numpy()
+        )
+        cos_dist = (
+            cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy()
+        )
+        result = []
+        batch_size = image_embeds.shape[0]
+        for i in range(batch_size):
+            result_img = {
+                "special_scores": {},
+                "special_care": [],
+                "concept_scores": {},
+                "bad_concepts": [],
+            }
+            # increase this value to create a stronger `nfsw` filter
+            # at the cost of increasing the possibility of filtering benign images
+            adjustment = 0.0
+            for concept_idx in range(len(special_cos_dist[0])):
+                concept_cos = special_cos_dist[i][concept_idx]
+                concept_threshold = self.special_care_embeds_weights[concept_idx].item()
+                result_img["special_scores"][concept_idx] = round(
+                    concept_cos - concept_threshold + adjustment, 3
+                )
+                if result_img["special_scores"][concept_idx] > 0:
+                    result_img["special_care"].append(
+                        {concept_idx, result_img["special_scores"][concept_idx]}
+                    )
+                    adjustment = 0.01
+            for concept_idx in range(len(cos_dist[0])):
+                concept_cos = cos_dist[i][concept_idx]
+                concept_threshold = self.concept_embeds_weights[concept_idx].item()
+                result_img["concept_scores"][concept_idx] = round(
+                    concept_cos - concept_threshold + adjustment, 3
+                )
+                if result_img["concept_scores"][concept_idx] > get_nsfw_threshold():
+                    result_img["bad_concepts"].append(concept_idx)
+            result.append(result_img)
+        has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
+        # Blur images based on NSFW score
+        # -------------------------------
+        for idx, has_nsfw_concept in enumerate(has_nsfw_concepts):
+            if any(has_nsfw_concepts) and not get_nsfw_access():
+                if torch.is_tensor(images) or torch.is_tensor(images[0]):
+                    image = images[idx].cpu().numpy().astype(np.float32)
+                    image = cv2.blur(image, (30, 30))
+                    image = torch.from_numpy(image)
+                    images[idx] = image
+                else:
+                    images[idx] = cv2.blur(images[idx], (30, 30))
+        if any(has_nsfw_concepts):
+            print("NSFW")
+        return images, has_nsfw_concepts
+    @torch.no_grad()
+    def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
+        pooled_output = self.vision_model(clip_input)[1]  # pooled_output
+        image_embeds = self.visual_projection(pooled_output)
+        special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
+        cos_dist = cosine_distance(image_embeds, self.concept_embeds)
+        # increase this value to create a stronger `nsfw` filter
+        # at the cost of increasing the possibility of filtering benign images
+        adjustment = 0.0
+        special_scores = (
+            special_cos_dist - self.special_care_embeds_weights + adjustment
+        )
+        # special_scores = special_scores.round(decimals=3)
+        special_care = torch.any(special_scores > 0, dim=1)
+        special_adjustment = special_care * 0.01
+        special_adjustment = special_adjustment.unsqueeze(1).expand(
+            -1, cos_dist.shape[1]
+        )
+        concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
+        # concept_scores = concept_scores.round(decimals=3)
+        has_nsfw_concepts = torch.any(concept_scores > get_nsfw_threshold(), dim=1)
+        # Blur images based on NSFW score
+        # -------------------------------
+        if not get_nsfw_access():
+            image = images[has_nsfw_concepts].cpu().numpy().astype(np.float32)
+            image = cv2.blur(image, (30, 30))
+            image = torch.from_numpy(image)
+            images[has_nsfw_concepts] = image
+        return images, has_nsfw_concepts

internals/pipelines/twoStepPipeline.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import torch
+from diffusers import StableDiffusionPipeline
+torch.backends.cudnn.benchmark = True
+torch.backends.cuda.matmul.allow_tf32 = True
+from typing import Any, Callable, Dict, List, Optional, Union
+from diffusers import StableDiffusionPipeline
+from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
+class two_step_pipeline(StableDiffusionPipeline):
+    @torch.no_grad()
+    def two_step_pipeline(
+        self,
+        prompt: Union[str, List[str]] = None,
+        modified_prompts: Union[str, List[str]] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 7.5,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: int = 1,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        iteration: float = 3.0,
+    ):
+        r"""
+        Function invoked when calling the pipeline for generation.
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The height in pixels of the generated image.
+            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The width in pixels of the generated image.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            guidance_scale (`float`, *optional*, defaults to 7.5):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+                to make generation deterministic.
+            latents (`torch.FloatTensor`, *optional*):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
+                plain tuple.
+            callback (`Callable`, *optional*):
+                A function that will be called every `callback_steps` steps during inference. The function will be
+                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+            callback_steps (`int`, *optional*, defaults to 1):
+                The frequency at which the `callback` function will be called. If not specified, the callback will be
+                called at every step.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+                `self.processor` in
+                [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
+        Examples:
+        Returns:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
+            When returning a tuple, the first element is a list with the generated images, and the second element is a
+            list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
+            (nsfw) content, according to the `safety_checker`.
+        """
+        # 0. Default height and width to unet
+        height = height or self.unet.config.sample_size * self.vae_scale_factor
+        width = width or self.unet.config.sample_size * self.vae_scale_factor
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            prompt_embeds,
+            negative_prompt_embeds,
+        )
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+        device = self._execution_device
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+        # 3. Encode input prompt
+        modified_embeds = self._encode_prompt(
+            modified_prompts,
+            device,
+            num_images_per_prompt,
+            do_classifier_free_guidance,
+            negative_prompt,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+        )
+        print("mod prompt size : ", modified_embeds.size(), modified_embeds.dtype)
+        prompt_embeds = self._encode_prompt(
+            prompt,
+            device,
+            num_images_per_prompt,
+            do_classifier_free_guidance,
+            negative_prompt,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+        )
+        print("prompt size : ", prompt_embeds.size(), prompt_embeds.dtype)
+        # 4. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+        timesteps = self.scheduler.timesteps
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+        # 7. Denoising loop
+        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = (
+                    torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+                )
+                latent_model_input = self.scheduler.scale_model_input(
+                    latent_model_input, t
+                )
+                # predict the noise residual
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                ).sample
+                # perform guidance
+                if do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (
+                        noise_pred_text - noise_pred_uncond
+                    )
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(
+                    noise_pred, t, latents, **extra_step_kwargs
+                ).prev_sample
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or (
+                    (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
+                ):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        callback(i, t, latents)
+                if i == int(len(timesteps) / iteration):
+                    print("modified prompts")
+                    prompt_embeds = modified_embeds
+        if output_type == "latent":
+            image = latents
+            has_nsfw_concept = None
+        elif output_type == "pil":
+            # 8. Post-processing
+            image = self.decode_latents(latents)
+            # 9. Run safety checker
+            image, has_nsfw_concept = self.run_safety_checker(
+                image, device, prompt_embeds.dtype
+            )
+            # 10. Convert to PIL
+            image = self.numpy_to_pil(image)
+        else:
+            # 8. Post-processing
+            image = self.decode_latents(latents)
+            # 9. Run safety checker
+            image, has_nsfw_concept = self.run_safety_checker(
+                image, device, prompt_embeds.dtype
+            )
+        # Offload last model to CPU
+        if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
+            self.final_offload_hook.offload()
+        if not return_dict:
+            return (image, has_nsfw_concept)
+        return StableDiffusionPipelineOutput(
+            images=image, nsfw_content_detected=has_nsfw_concept
+        )

internals/pipelines/upscaler.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import math
+import os
+from pathlib import Path
+from typing import Union
+import cv2
+import numpy as np
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+from PIL import Image
+from realesrgan import RealESRGANer
+import internals.util.image as ImageUtil
+from internals.util.commons import download_image
+class Upscaler:
+    __model_esrgan_url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
+    __model_esrgan_anime_url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth"
+    def load(self):
+        download_dir = Path(Path.home() / ".cache" / "realesrgan")
+        download_dir.mkdir(parents=True, exist_ok=True)
+        self.__model_path = self.__preload_model(self.__model_esrgan_url, download_dir)
+        self.__model_path_anime = self.__preload_model(
+            self.__model_esrgan_anime_url, download_dir
+        )
+    def upscale(self, image: Union[str, Image.Image], resize_dimension: int) -> bytes:
+        model = RRDBNet(
+            num_in_ch=3,
+            num_out_ch=3,
+            num_feat=64,
+            num_block=23,
+            num_grow_ch=32,
+            scale=4,
+        )
+        return self.__internal_upscale(
+            image, resize_dimension, self.__model_path, model
+        )
+    def upscale_anime(
+        self, image: Union[str, Image.Image], resize_dimension: int
+    ) -> bytes:
+        model = RRDBNet(
+            num_in_ch=3,
+            num_out_ch=3,
+            num_feat=64,
+            num_block=23,
+            num_grow_ch=32,
+            scale=4,
+        )
+        return self.__internal_upscale(
+            image, resize_dimension, self.__model_path_anime, model
+        )
+    def __preload_model(self, url: str, download_dir: Path):
+        name = url.split("/")[-1]
+        if not os.path.exists(str(download_dir / name)):
+            return load_file_from_url(
+                url=url,
+                model_dir=str(download_dir),
+                progress=True,
+                file_name=None,
+            )
+        else:
+            return str(download_dir / name)
+    def __internal_upscale(
+        self,
+        image,
+        resize_dimension: int,
+        model_path: str,
+        rrbdnet: RRDBNet,
+    ) -> bytes:
+        if type(image) is str:
+            image = download_image(image)
+            image = ImageUtil.resize_image_to512(image)
+            image = ImageUtil.to_bytes(image)
+        upsampler = RealESRGANer(
+            scale=4, model_path=model_path, model=rrbdnet, half="fp16", gpu_id="0"
+        )
+        image_array = np.frombuffer(image, dtype=np.uint8)
+        input_image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
+        dimension = min(input_image.shape[0], input_image.shape[1])
+        scale = max(math.floor(resize_dimension / dimension), 2)
+        output, _ = upsampler.enhance(input_image, outscale=scale)
+        out_bytes = cv2.imencode(".png", output)[1].tobytes()
+        return out_bytes

internals/util/__init__.py ADDED Viewed

File without changes

internals/util/args.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import re
+from typing import Dict
+def apply_style_args(data: Dict):
+    prompt = data.get("prompt", None)
+    if prompt is None:
+        return
+    result = re.match(r"\[style:(.*?)\]", prompt)
+    if result is not None:
+        style = result.group(1)
+        data["style"] = style
+        data["prompt"] = prompt.replace(f"[style:{style}]", "").strip()

internals/util/avatar.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import json
+import os
+import re
+from internals.data.dataAccessor import getCharacters
+from internals.util.config import root_dir
+class Avatar:
+    __avatars = {}
+    def load_local(self):
+        self.__find_available_characters(root_dir)
+        if len(self.__avatars.items()) > 0:
+            print("Local characters", self.__avatars)
+    def fetch_from_network(self, model_id: int):
+        characters = getCharacters(str(model_id))
+        if characters is not None:
+            for character in characters:
+                item = {
+                    "avatarName": str(character["title"]).lower(),
+                    "codename": character["tag"],
+                    "extraPrompt": character["extraData"]["extraPrompt"],
+                }
+                self.__avatars[item["avatarName"]] = item
+    def add_code_names(self, prompt):
+        array_of_objects = self.__avatars.values()
+        for obj in array_of_objects:
+            prompt = (
+                re.sub(
+                    r"\b" + obj["avatarName"] + r"\b",
+                    obj["extraPrompt"],
+                    prompt,
+                    flags=re.IGNORECASE,
+                )
+                + " "
+            )
+        print(prompt)
+        return prompt
+    def __find_available_characters(self, path: str):
+        if os.path.exists(path + "/characters.json"):
+            print(path)
+            try:
+                print("Loading characters")
+                with open(path + "/characters.json") as f:
+                    data = json.load(f)
+                    print("Characters: ", data)
+                    if "avatarName" in data[0]:
+                        for item in data:
+                            self.__avatars[item["avatarName"]] = item
+                        print("Avatars", self.__avatars)
+                    else:
+                        print("Invalid characters.json file")
+            except Exception as e:
+                print("Error Loading characters", e)

internals/util/cache.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gc
+import torch
+def clear_cuda_and_gc():
+    clear_cuda()
+    clear_gc()
+def clear_cuda():
+    torch.cuda.empty_cache()
+def clear_gc():
+    gc.collect()
+def auto_clear_cuda_and_gc(controlnet):
+    def auto_clear_cuda_and_gc_wrapper(func):
+        def wrapper(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                controlnet.cleanup()
+                clear_cuda_and_gc()
+                raise e
+        return wrapper
+    return auto_clear_cuda_and_gc_wrapper

internals/util/commons.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import json
+import os
+import pprint
+import random
+import re
+from io import BytesIO
+from pathlib import Path
+from typing import Union
+import boto3
+import requests
+from internals.util.config import api_endpoint, api_headers
+s3 = boto3.client("s3")
+import io
+import urllib.request
+from PIL import Image
+black_list = {"alphonse mucha": "", "adolphe bouguereau": ""}
+pp = pprint.PrettyPrinter(indent=4)
+webhook_url = (
+    "https://hooks.slack.com/services/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"
+)
+error_webhook = (
+    "https://hooks.slack.com/services/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"
+)
+characterSheets = [
+    "character+sheets/1.1.png",
+    "character+sheets/10.1.png",
+    "character+sheets/11.1.png",
+    "character+sheets/12.1.png",
+    "character+sheets/13.1.png",
+    "character+sheets/14.1.png",
+    "character+sheets/16.1.png",
+    "character+sheets/17.1.png",
+    "character+sheets/18.1.png",
+    "character+sheets/19.1.png",
+    "character+sheets/2.1.png",
+    "character+sheets/20.1.png",
+    "character+sheets/21.1.png",
+    "character+sheets/22.1.png",
+    "character+sheets/23.1.png",
+    "character+sheets/24.1.png",
+    "character+sheets/25.1.png",
+    "character+sheets/26.1.png",
+    "character+sheets/27.1.png",
+    "character+sheets/28.1.png",
+    "character+sheets/29.1.png",
+    "character+sheets/3.1.png",
+    "character+sheets/30.1.png",
+    "character+sheets/31.1.png",
+    "character+sheets/32.1.png",
+    "character+sheets/33.1.png",
+    "character+sheets/34.1.png",
+    "character+sheets/35.1.png",
+    "character+sheets/36.1.png",
+    "character+sheets/38.1.png",
+    "character+sheets/39.1.png",
+    "character+sheets/4.1.png",
+    "character+sheets/40.1.png",
+    "character+sheets/42.1.png",
+    "character+sheets/43.1.png",
+    "character+sheets/44.1.png",
+    "character+sheets/45.1.png",
+    "character+sheets/46.1.png",
+    "character+sheets/47.1.png",
+    "character+sheets/48.1.png",
+    "character+sheets/49.1.png",
+    "character+sheets/5.1.png",
+    "character+sheets/50.1.png",
+    "character+sheets/51.1.png",
+    "character+sheets/52.1.png",
+    "character+sheets/53.1.png",
+    "character+sheets/54.1.png",
+    "character+sheets/55.1.png",
+    "character+sheets/56.1.png",
+    "character+sheets/57.1.png",
+    "character+sheets/58.1.png",
+    "character+sheets/59.1.png",
+    "character+sheets/60.1.png",
+    "character+sheets/61.1.png",
+    "character+sheets/62.1.png",
+    "character+sheets/63.1.png",
+    "character+sheets/64.1.png",
+    "character+sheets/65.1.png",
+    "character+sheets/66.1.png",
+    "character+sheets/7.1.png",
+    "character+sheets/8.1.png",
+    "character+sheets/9.1.png",
+]
+def upload_images(images, processName: str, taskId: str):
+    imageUrls = []
+    for i, image in enumerate(images):
+        img_io = BytesIO()
+        image.save(img_io, "JPEG", quality=100)
+        img_io.seek(0)
+        key = "crecoAI/{}{}_{}.png".format(taskId, processName, i)
+        requests.post(
+            api_endpoint()
+            + "/comic-content/v1.0/upload/crecoai-assets-2?fileName="
+            + "{}{}_{}.png".format(taskId, processName, i),
+            headers=api_headers(),
+            files={"file": ("image.png", img_io, "image/png")},
+        )
+        # t = s3.put_object(
+        #     Bucket="comic-assets", Key=key, Body=img_io.getvalue(), ACL="public-read"
+        # )
+        # print("uploading done to s3", key, t)
+        imageUrls.append(
+            "https://comic-assets.s3.ap-south-1.amazonaws.com/crecoAI/{}{}_{}.png".format(
+                taskId, processName, i
+            )
+        )
+    print({"promptImages": imageUrls})
+    return imageUrls
+def upload_image(image: Union[Image.Image, BytesIO], out_path):
+    if type(image) is Image.Image:
+        buffer = io.BytesIO()
+        image.save(buffer, format="PNG")
+        image = buffer
+    image.seek(0)
+    requests.post(
+        api_endpoint()
+        + "/comic-content/v1.0/upload/crecoai-assets-2?fileName="
+        + str(out_path).replace("crecoAI/", ""),
+        headers=api_headers(),
+        files={"file": ("image.png", image, "image/png")},
+    )
+    # s3.upload_fileobj(image, "comic-assets", out_path, ExtraArgs={"ACL": "public-read"})
+    image.close()
+    image_url = "https://comic-assets.s3.ap-south-1.amazonaws.com/" + out_path
+    print({"promptImages": image_url})
+    return image_url
+def download_image(url) -> Image.Image:
+    response = requests.get(url)
+    return Image.open(BytesIO(response.content)).convert("RGB")
+def download_file(url, out_path: Path):
+    with requests.get(url, stream=True) as r:
+        r.raise_for_status()
+        with open(out_path, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+def pickPoses():
+    random_images = random.sample(characterSheets, 4)
+    poses = []
+    prefix = "https://comic-assets.s3.ap-south-1.amazonaws.com/"
+    # Use list comprehension to add prefix to all elements in the array
+    random_images_with_prefix = [prefix + img for img in random_images]
+    print(random_images_with_prefix)
+    for imageUrl in random_images_with_prefix:
+        # Download and resize the image
+        init_image = download_image(imageUrl).resize((512, 512))
+        # Open the pose image
+        imageUrlPose = imageUrl
+        # print(imageUrl)
+        input_image_bytes = read_url(imageUrlPose)
+        # print(input_image_bytes)
+        pose_image = Image.open(io.BytesIO(input_image_bytes)).convert("RGB")
+        # print(pose_image)
+        pose_image = pose_image.resize((512, 512))
+        # print(pose_image)
+        # Append the result to the poses array
+        poses.append(pose_image)
+    return poses
+def construct_default_s3_url(key):
+    return "https://comic-assets.s3.ap-south-1.amazonaws.com/" + key
+def read_url(url: str):
+    with urllib.request.urlopen(url) as u:
+        return u.read()
+def disable_safety_checker(pipe):
+    def dummy(images, **kwargs):
+        return images, False
+    pipe.safety_checker = None

internals/util/config.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+from internals.data.task import Task
+env = "gamma"
+nsfw_threshold = 0.0
+nsfw_access = False
+access_token = ""
+root_dir = ""
+def set_root_dir(main_file: str):
+    global root_dir
+    root_dir = os.path.dirname(os.path.abspath(main_file))
+def set_configs_from_task(task: Task):
+    global env, nsfw_threshold, nsfw_access, access_token
+    name = task.get_queue_name()
+    if name.startswith("prod"):
+        env = "prod"
+    else:
+        env = "gamma"
+    nsfw_threshold = task.get_nsfw_threshold()
+    nsfw_access = task.can_access_nsfw()
+    access_token = task.get_access_token()
+def get_root_dir():
+    global root_dir
+    return root_dir
+def get_environment():
+    global env
+    return env
+def get_nsfw_threshold():
+    global nsfw_threshold
+    return nsfw_threshold
+def get_nsfw_access():
+    global nsfw_access
+    return nsfw_access
+def api_headers():
+    return {
+        "Access-Token": access_token,
+    }
+def api_endpoint():
+    if env == "prod":
+        return "https://prod.pratilipicomics.com"
+    else:
+        return "https://gamma.pratilipicomics.com"
+def comic_url():
+    if env == "prod":
+        return "http://internal-k8s-prod-internal-bb9c57a6bb-1524739074.ap-south-1.elb.amazonaws.com:80"
+    else:
+        return "http://internal-k8s-gamma-internal-ea8e32da94-1997933257.ap-south-1.elb.amazonaws.com:80"

internals/util/failure_hander.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import json
+import os
+from pathlib import Path
+from internals.data.dataAccessor import updateSource
+from internals.data.task import Task
+from internals.util.config import set_configs_from_task
+from internals.util.slack import Slack
+class FailureHandler:
+    __task_path = Path.home() / ".cache" / "inference" / "task.json"
+    @staticmethod
+    def register():
+        path = FailureHandler.__task_path
+        path.parent.mkdir(parents=True, exist_ok=True)
+        if path.exists():
+            task = Task(json.loads(path.read_text()))
+            set_configs_from_task(task)
+            # Slack().error_alert(task, Exception("CATASTROPHIC FAILURE"))
+            updateSource(task.get_sourceId(), task.get_userId(), "FAILED")
+            os.remove(path)
+    @staticmethod
+    def clear(func):
+        def wrapper(*args, **kwargs):
+            result = func(*args, **kwargs)
+            if result is not None:
+                path = FailureHandler.__task_path
+                if path.exists():
+                    os.remove(path)
+            return result
+        return wrapper
+    @staticmethod
+    def handle(task: Task):
+        path = FailureHandler.__task_path
+        path.write_text(json.dumps(task.get_raw()))

internals/util/image.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import io
+from PIL import Image
+def to_bytes(image: Image.Image) -> bytes:
+    with io.BytesIO() as output:
+        image.save(output, format="JPEG")
+        return output.getvalue()
+def resize_image_to512(image: Image.Image) -> Image.Image:
+    iw, ih = image.size
+    if iw > ih:
+        image = image.resize((512, int(512 * ih / iw)))
+    else:
+        image = image.resize((int(512 * iw / ih), 512))
+    return image

internals/util/lora_style.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Union
+import boto3
+import torch
+from lora_diffusion import patch_pipe, tune_lora_scale
+from pydash import chain
+from internals.data.dataAccessor import getStyles
+from internals.util.commons import download_file
+class LoraStyle:
+    class LoraPatcher:
+        def __init__(self, pipe, style: Dict[str, Any]):
+            self.__style = style
+            self.pipe = pipe
+        @torch.inference_mode()
+        def patch(self):
+            path = self.__style["path"]
+            if str(path).endswith((".pt", ".safetensors")):
+                patch_pipe(self.pipe, self.__style["path"])
+                tune_lora_scale(self.pipe.unet, self.__style["weight"])
+                tune_lora_scale(self.pipe.text_encoder, self.__style["weight"])
+        def kwargs(self):
+            return {}
+        def cleanup(self):
+            tune_lora_scale(self.pipe.unet, 0.0)
+            tune_lora_scale(self.pipe.text_encoder, 0.0)
+            pass
+    class EmptyLoraPatcher:
+        def __init__(self, pipe):
+            self.pipe = pipe
+        def patch(self):
+            "Patch will act as cleanup, to tune down any corrupted lora"
+            self.cleanup()
+            pass
+        def kwargs(self):
+            return {}
+        def cleanup(self):
+            tune_lora_scale(self.pipe.unet, 0.0)
+            tune_lora_scale(self.pipe.text_encoder, 0.0)
+            pass
+    def load(self, model_dir: str):
+        self.model = model_dir
+        self.fetch_styles()
+    def fetch_styles(self):
+        model_dir = self.model
+        result = getStyles()
+        if result is not None:
+            self.__styles = self.__parse_styles(model_dir, result["data"])
+        else:
+            self.__styles = self.__get_default_styles(model_dir)
+        self.__verify()
+    def prepend_style_to_prompt(self, prompt: str, key: str) -> str:
+        if key in self.__styles:
+            style = self.__styles[key]
+            return f"{', '.join(style['text'])}, {prompt}"
+        return prompt
+    def get_patcher(self, pipe, key: str) -> Union[LoraPatcher, EmptyLoraPatcher]:
+        if key in self.__styles:
+            style = self.__styles[key]
+            return self.LoraPatcher(pipe, style)
+        return self.EmptyLoraPatcher(pipe)
+    def __parse_styles(self, model_dir: str, data: List[Dict]) -> Dict:
+        styles = {}
+        download_dir = Path(Path.home() / ".cache" / "lora")
+        download_dir.mkdir(exist_ok=True)
+        data = chain(data).uniq_by(lambda x: x["tag"]).value()
+        for item in data:
+            if item["attributes"] is not None:
+                attr = json.loads(item["attributes"])
+                if "path" in attr:
+                    file_path = Path(download_dir / attr["path"].split("/")[-1])
+                    if not file_path.exists():
+                        s3_uri = attr["path"]
+                        download_file(s3_uri, file_path)
+                    styles[item["tag"]] = {
+                        "path": str(file_path),
+                        "weight": attr["weight"],
+                        "type": attr["type"],
+                        "text": attr["text"],
+                        "negativePrompt": attr["negativePrompt"],
+                    }
+        if len(styles) == 0:
+            return self.__get_default_styles(model_dir)
+        return styles
+    def __get_default_styles(self, model_dir: str) -> Dict:
+        return {
+            "nq6akX1CIp": {
+                "path": model_dir + "/laur_style/nq6akX1CIp/final_lora.safetensors",
+                "text": ["nq6akX1CIp style"],
+                "weight": 0.5,
+                "negativePrompt": [""],
+                "type": "custom",
+            },
+            "ghibli": {
+                "path": model_dir + "/laur_style/nq6akX1CIp/ghibli.bin",
+                "text": ["ghibli style"],
+                "weight": 1,
+                "negativePrompt": [""],
+                "type": "custom",
+            },
+            "eQAmnK2kB2": {
+                "path": model_dir + "/laur_style/eQAmnK2kB2/final_lora.safetensors",
+                "text": ["eQAmnK2kB2 style"],
+                "weight": 0.5,
+                "negativePrompt": [""],
+                "type": "custom",
+            },
+            "to8contrast": {
+                "path": model_dir + "/laur_style/rpjgusOgqD/final_lora.bin",
+                "text": ["to8contrast style"],
+                "weight": 0.5,
+                "negativePrompt": [""],
+                "type": "custom",
+            },
+            "sfrrfz8vge": {
+                "path": model_dir + "/laur_style/replicate/sfrrfz8vge.safetensors",
+                "text": ["sfrrfz8vge style"],
+                "weight": 1.2,
+                "negativePrompt": [""],
+                "type": "custom",
+            },
+        }
+    def __verify(self):
+        "A method to verify if lora exists within the required path otherwise throw error"
+        for item in self.__styles.keys():
+            if not os.path.exists(self.__styles[item]["path"]):
+                raise Exception(
+                    "Lora style model "
+                    + item
+                    + " not found at path: "
+                    + self.__styles[item]["path"]
+                )

internals/util/slack.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from time import sleep
+from typing import Optional
+import requests
+from internals.data.task import Task
+from internals.util.config import get_environment
+class Slack:
+    def __init__(self):
+        # self.webhook_url = "https://hooks.slack.com/services/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"
+        self.webhook_url = "https://hooks.slack.com/services/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"
+        self.error_webhook = "https://hooks.slack.com/services/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"
+    def send_alert(self, task: Task, args: Optional[dict]):
+        raw = task.get_raw().copy()
+        raw["environment"] = get_environment()
+        raw.pop("queue_name", None)
+        raw.pop("attempt", None)
+        raw.pop("timestamp", None)
+        raw.pop("task_id", None)
+        raw.pop("maskImageUrl", None)
+        if args is not None:
+            raw.update(args.items())
+        message = ""
+        for key, value in raw.items():
+            if value:
+                if type(value) == list:
+                    message += f"*{key}*: {', '.join(value)}\n"
+                else:
+                    message += f"*{key}*: {value}\n"
+        requests.post(
+            self.webhook_url,
+            headers={"Content-Type": "application/json"},
+            json={"text": message},
+        )
+    def error_alert(self, task: Task, e: Exception):
+        requests.post(
+            self.error_webhook,
+            headers={"Content-Type": "application/json"},
+            json={
+                "text": "Task failed:\n{} \n error is: \n {}".format(task.get_raw(), e)
+            },
+        )
+    def auto_send_alert(self, func):
+        def inner(*args, **kwargs):
+            rargs = func(*args, **kwargs)
+            self.send_alert(args[0], rargs)
+            return rargs
+        return inner

models/ade20k/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/ade20k/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .base import *

models/ade20k/base.py ADDED Viewed

	@@ -0,0 +1,627 @@

+"""Modified from https://github.com/CSAILVision/semantic-segmentation-pytorch"""
+import os
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from scipy.io import loadmat
+from torch.nn.modules import BatchNorm2d
+from . import resnet
+from . import mobilenet
+NUM_CLASS = 150
+base_path = os.path.dirname(os.path.abspath(__file__))  # current file path
+colors_path = os.path.join(base_path, 'color150.mat')
+classes_path = os.path.join(base_path, 'object150_info.csv')
+segm_options = dict(colors=loadmat(colors_path)['colors'],
+                    classes=pd.read_csv(classes_path),)
+class NormalizeTensor:
+    def __init__(self, mean, std, inplace=False):
+        """Normalize a tensor image with mean and standard deviation.
+        .. note::
+            This transform acts out of place by default, i.e., it does not mutates the input tensor.
+        See :class:`~torchvision.transforms.Normalize` for more details.
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+            mean (sequence): Sequence of means for each channel.
+            std (sequence): Sequence of standard deviations for each channel.
+            inplace(bool,optional): Bool to make this operation inplace.
+        Returns:
+            Tensor: Normalized Tensor image.
+        """
+        self.mean = mean
+        self.std = std
+        self.inplace = inplace
+    def __call__(self, tensor):
+        if not self.inplace:
+            tensor = tensor.clone()
+        dtype = tensor.dtype
+        mean = torch.as_tensor(self.mean, dtype=dtype, device=tensor.device)
+        std = torch.as_tensor(self.std, dtype=dtype, device=tensor.device)
+        tensor.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
+        return tensor
+# Model Builder
+class ModelBuilder:
+    # custom weights initialization
+    @staticmethod
+    def weights_init(m):
+        classname = m.__class__.__name__
+        if classname.find('Conv') != -1:
+            nn.init.kaiming_normal_(m.weight.data)
+        elif classname.find('BatchNorm') != -1:
+            m.weight.data.fill_(1.)
+            m.bias.data.fill_(1e-4)
+    @staticmethod
+    def build_encoder(arch='resnet50dilated', fc_dim=512, weights=''):
+        pretrained = True if len(weights) == 0 else False
+        arch = arch.lower()
+        if arch == 'mobilenetv2dilated':
+            orig_mobilenet = mobilenet.__dict__['mobilenetv2'](pretrained=pretrained)
+            net_encoder = MobileNetV2Dilated(orig_mobilenet, dilate_scale=8)
+        elif arch == 'resnet18':
+            orig_resnet = resnet.__dict__['resnet18'](pretrained=pretrained)
+            net_encoder = Resnet(orig_resnet)
+        elif arch == 'resnet18dilated':
+            orig_resnet = resnet.__dict__['resnet18'](pretrained=pretrained)
+            net_encoder = ResnetDilated(orig_resnet, dilate_scale=8)
+        elif arch == 'resnet50dilated':
+            orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained)
+            net_encoder = ResnetDilated(orig_resnet, dilate_scale=8)
+        elif arch == 'resnet50':
+            orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained)
+            net_encoder = Resnet(orig_resnet)
+        else:
+            raise Exception('Architecture undefined!')
+        # encoders are usually pretrained
+        # net_encoder.apply(ModelBuilder.weights_init)
+        if len(weights) > 0:
+            print('Loading weights for net_encoder')
+            net_encoder.load_state_dict(
+                torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
+        return net_encoder
+    @staticmethod
+    def build_decoder(arch='ppm_deepsup',
+                      fc_dim=512, num_class=NUM_CLASS,
+                      weights='', use_softmax=False, drop_last_conv=False):
+        arch = arch.lower()
+        if arch == 'ppm_deepsup':
+            net_decoder = PPMDeepsup(
+                num_class=num_class,
+                fc_dim=fc_dim,
+                use_softmax=use_softmax,
+                drop_last_conv=drop_last_conv)
+        elif arch == 'c1_deepsup':
+            net_decoder = C1DeepSup(
+                num_class=num_class,
+                fc_dim=fc_dim,
+                use_softmax=use_softmax,
+                drop_last_conv=drop_last_conv)
+        else:
+            raise Exception('Architecture undefined!')
+        net_decoder.apply(ModelBuilder.weights_init)
+        if len(weights) > 0:
+            print('Loading weights for net_decoder')
+            net_decoder.load_state_dict(
+                torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
+        return net_decoder
+    @staticmethod
+    def get_decoder(weights_path, arch_encoder, arch_decoder, fc_dim, drop_last_conv, *arts, **kwargs):
+        path = os.path.join(weights_path, 'ade20k', f'ade20k-{arch_encoder}-{arch_decoder}/decoder_epoch_20.pth')
+        return ModelBuilder.build_decoder(arch=arch_decoder, fc_dim=fc_dim, weights=path, use_softmax=True, drop_last_conv=drop_last_conv)
+    @staticmethod
+    def get_encoder(weights_path, arch_encoder, arch_decoder, fc_dim, segmentation,
+                    *arts, **kwargs):
+        if segmentation:
+            path = os.path.join(weights_path, 'ade20k', f'ade20k-{arch_encoder}-{arch_decoder}/encoder_epoch_20.pth')
+        else:
+            path = ''
+        return ModelBuilder.build_encoder(arch=arch_encoder, fc_dim=fc_dim, weights=path)
+def conv3x3_bn_relu(in_planes, out_planes, stride=1):
+    return nn.Sequential(
+        nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False),
+        BatchNorm2d(out_planes),
+        nn.ReLU(inplace=True),
+    )
+class SegmentationModule(nn.Module):
+    def __init__(self,
+                 weights_path,
+                 num_classes=150,
+                 arch_encoder="resnet50dilated",
+                 drop_last_conv=False,
+                 net_enc=None,  # None for Default encoder
+                 net_dec=None,  # None for Default decoder
+                 encode=None,  # {None, 'binary', 'color', 'sky'}
+                 use_default_normalization=False,
+                 return_feature_maps=False,
+                 return_feature_maps_level=3,  # {0, 1, 2, 3}
+                 return_feature_maps_only=True,
+                 **kwargs,
+                 ):
+        super().__init__()
+        self.weights_path = weights_path
+        self.drop_last_conv = drop_last_conv
+        self.arch_encoder = arch_encoder
+        if self.arch_encoder == "resnet50dilated":
+            self.arch_decoder = "ppm_deepsup"
+            self.fc_dim = 2048
+        elif self.arch_encoder == "mobilenetv2dilated":
+            self.arch_decoder = "c1_deepsup"
+            self.fc_dim = 320
+        else:
+            raise NotImplementedError(f"No such arch_encoder={self.arch_encoder}")
+        model_builder_kwargs = dict(arch_encoder=self.arch_encoder,
+                                    arch_decoder=self.arch_decoder,
+                                    fc_dim=self.fc_dim,
+                                    drop_last_conv=drop_last_conv,
+                                    weights_path=self.weights_path)
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.encoder = ModelBuilder.get_encoder(**model_builder_kwargs) if net_enc is None else net_enc
+        self.decoder = ModelBuilder.get_decoder(**model_builder_kwargs) if net_dec is None else net_dec
+        self.use_default_normalization = use_default_normalization
+        self.default_normalization = NormalizeTensor(mean=[0.485, 0.456, 0.406],
+                                                     std=[0.229, 0.224, 0.225])
+        self.encode = encode
+        self.return_feature_maps = return_feature_maps
+        assert 0 <= return_feature_maps_level <= 3
+        self.return_feature_maps_level = return_feature_maps_level
+    def normalize_input(self, tensor):
+        if tensor.min() < 0 or tensor.max() > 1:
+            raise ValueError("Tensor should be 0..1 before using normalize_input")
+        return self.default_normalization(tensor)
+    @property
+    def feature_maps_channels(self):
+        return 256 * 2**(self.return_feature_maps_level)  # 256, 512, 1024, 2048
+    def forward(self, img_data, segSize=None):
+        if segSize is None:
+            raise NotImplementedError("Please pass segSize param. By default: (300, 300)")
+        fmaps = self.encoder(img_data, return_feature_maps=True)
+        pred = self.decoder(fmaps, segSize=segSize)
+        if self.return_feature_maps:
+            return pred, fmaps
+        # print("BINARY", img_data.shape, pred.shape)
+        return pred
+    def multi_mask_from_multiclass(self, pred, classes):
+        def isin(ar1, ar2):
+            return (ar1[..., None] == ar2).any(-1).float()
+        return isin(pred, torch.LongTensor(classes).to(self.device))
+    @staticmethod
+    def multi_mask_from_multiclass_probs(scores, classes):
+        res = None
+        for c in classes:
+            if res is None:
+                res = scores[:, c]
+            else:
+                res += scores[:, c]
+        return res
+    def predict(self, tensor, imgSizes=(-1,),  # (300, 375, 450, 525, 600)
+                segSize=None):
+        """Entry-point for segmentation. Use this methods instead of forward
+        Arguments:
+            tensor {torch.Tensor} -- BCHW
+        Keyword Arguments:
+            imgSizes {tuple or list} -- imgSizes for segmentation input.
+                default: (300, 450)
+                original implementation: (300, 375, 450, 525, 600)
+        """
+        if segSize is None:
+            segSize = tensor.shape[-2:]
+        segSize = (tensor.shape[2], tensor.shape[3])
+        with torch.no_grad():
+            if self.use_default_normalization:
+                tensor = self.normalize_input(tensor)
+            scores = torch.zeros(1, NUM_CLASS, segSize[0], segSize[1]).to(self.device)
+            features = torch.zeros(1, self.feature_maps_channels, segSize[0], segSize[1]).to(self.device)
+            result = []
+            for img_size in imgSizes:
+                if img_size != -1:
+                    img_data = F.interpolate(tensor.clone(), size=img_size)
+                else:
+                    img_data = tensor.clone()
+                if self.return_feature_maps:
+                    pred_current, fmaps = self.forward(img_data, segSize=segSize)
+                else:
+                    pred_current = self.forward(img_data, segSize=segSize)
+                result.append(pred_current)
+                scores = scores + pred_current / len(imgSizes)
+                # Disclaimer: We use and aggregate only last fmaps: fmaps[3]
+                if self.return_feature_maps:
+                    features = features + F.interpolate(fmaps[self.return_feature_maps_level], size=segSize) / len(imgSizes)
+            _, pred = torch.max(scores, dim=1)
+            if self.return_feature_maps:
+                return features
+            return pred, result
+    def get_edges(self, t):
+        edge = torch.cuda.ByteTensor(t.size()).zero_()
+        edge[:, :, :, 1:] = edge[:, :, :, 1:] | (t[:, :, :, 1:] != t[:, :, :, :-1])
+        edge[:, :, :, :-1] = edge[:, :, :, :-1] | (t[:, :, :, 1:] != t[:, :, :, :-1])
+        edge[:, :, 1:, :] = edge[:, :, 1:, :] | (t[:, :, 1:, :] != t[:, :, :-1, :])
+        edge[:, :, :-1, :] = edge[:, :, :-1, :] | (t[:, :, 1:, :] != t[:, :, :-1, :])
+        if True:
+            return edge.half()
+        return edge.float()
+# pyramid pooling, deep supervision
+class PPMDeepsup(nn.Module):
+    def __init__(self, num_class=NUM_CLASS, fc_dim=4096,
+                 use_softmax=False, pool_scales=(1, 2, 3, 6),
+                 drop_last_conv=False):
+        super().__init__()
+        self.use_softmax = use_softmax
+        self.drop_last_conv = drop_last_conv
+        self.ppm = []
+        for scale in pool_scales:
+            self.ppm.append(nn.Sequential(
+                nn.AdaptiveAvgPool2d(scale),
+                nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False),
+                BatchNorm2d(512),
+                nn.ReLU(inplace=True)
+            ))
+        self.ppm = nn.ModuleList(self.ppm)
+        self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1)
+        self.conv_last = nn.Sequential(
+            nn.Conv2d(fc_dim + len(pool_scales) * 512, 512,
+                      kernel_size=3, padding=1, bias=False),
+            BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, num_class, kernel_size=1)
+        )
+        self.conv_last_deepsup = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
+        self.dropout_deepsup = nn.Dropout2d(0.1)
+    def forward(self, conv_out, segSize=None):
+        conv5 = conv_out[-1]
+        input_size = conv5.size()
+        ppm_out = [conv5]
+        for pool_scale in self.ppm:
+            ppm_out.append(nn.functional.interpolate(
+                pool_scale(conv5),
+                (input_size[2], input_size[3]),
+                mode='bilinear', align_corners=False))
+        ppm_out = torch.cat(ppm_out, 1)
+        if self.drop_last_conv:
+            return ppm_out
+        else:
+            x = self.conv_last(ppm_out)
+            if self.use_softmax:  # is True during inference
+                x = nn.functional.interpolate(
+                    x, size=segSize, mode='bilinear', align_corners=False)
+                x = nn.functional.softmax(x, dim=1)
+                return x
+            # deep sup
+            conv4 = conv_out[-2]
+            _ = self.cbr_deepsup(conv4)
+            _ = self.dropout_deepsup(_)
+            _ = self.conv_last_deepsup(_)
+            x = nn.functional.log_softmax(x, dim=1)
+            _ = nn.functional.log_softmax(_, dim=1)
+            return (x, _)
+class Resnet(nn.Module):
+    def __init__(self, orig_resnet):
+        super(Resnet, self).__init__()
+        # take pretrained resnet, except AvgPool and FC
+        self.conv1 = orig_resnet.conv1
+        self.bn1 = orig_resnet.bn1
+        self.relu1 = orig_resnet.relu1
+        self.conv2 = orig_resnet.conv2
+        self.bn2 = orig_resnet.bn2
+        self.relu2 = orig_resnet.relu2
+        self.conv3 = orig_resnet.conv3
+        self.bn3 = orig_resnet.bn3
+        self.relu3 = orig_resnet.relu3
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+    def forward(self, x, return_feature_maps=False):
+        conv_out = []
+        x = self.relu1(self.bn1(self.conv1(x)))
+        x = self.relu2(self.bn2(self.conv2(x)))
+        x = self.relu3(self.bn3(self.conv3(x)))
+        x = self.maxpool(x)
+        x = self.layer1(x); conv_out.append(x);
+        x = self.layer2(x); conv_out.append(x);
+        x = self.layer3(x); conv_out.append(x);
+        x = self.layer4(x); conv_out.append(x);
+        if return_feature_maps:
+            return conv_out
+        return [x]
+# Resnet Dilated
+class ResnetDilated(nn.Module):
+    def __init__(self, orig_resnet, dilate_scale=8):
+        super().__init__()
+        from functools import partial
+        if dilate_scale == 8:
+            orig_resnet.layer3.apply(
+                partial(self._nostride_dilate, dilate=2))
+            orig_resnet.layer4.apply(
+                partial(self._nostride_dilate, dilate=4))
+        elif dilate_scale == 16:
+            orig_resnet.layer4.apply(
+                partial(self._nostride_dilate, dilate=2))
+        # take pretrained resnet, except AvgPool and FC
+        self.conv1 = orig_resnet.conv1
+        self.bn1 = orig_resnet.bn1
+        self.relu1 = orig_resnet.relu1
+        self.conv2 = orig_resnet.conv2
+        self.bn2 = orig_resnet.bn2
+        self.relu2 = orig_resnet.relu2
+        self.conv3 = orig_resnet.conv3
+        self.bn3 = orig_resnet.bn3
+        self.relu3 = orig_resnet.relu3
+        self.maxpool = orig_resnet.maxpool
+        self.layer1 = orig_resnet.layer1
+        self.layer2 = orig_resnet.layer2
+        self.layer3 = orig_resnet.layer3
+        self.layer4 = orig_resnet.layer4
+    def _nostride_dilate(self, m, dilate):
+        classname = m.__class__.__name__
+        if classname.find('Conv') != -1:
+            # the convolution with stride
+            if m.stride == (2, 2):
+                m.stride = (1, 1)
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate // 2, dilate // 2)
+                    m.padding = (dilate // 2, dilate // 2)
+            # other convoluions
+            else:
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate, dilate)
+                    m.padding = (dilate, dilate)
+    def forward(self, x, return_feature_maps=False):
+        conv_out = []
+        x = self.relu1(self.bn1(self.conv1(x)))
+        x = self.relu2(self.bn2(self.conv2(x)))
+        x = self.relu3(self.bn3(self.conv3(x)))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        conv_out.append(x)
+        x = self.layer2(x)
+        conv_out.append(x)
+        x = self.layer3(x)
+        conv_out.append(x)
+        x = self.layer4(x)
+        conv_out.append(x)
+        if return_feature_maps:
+            return conv_out
+        return [x]
+class MobileNetV2Dilated(nn.Module):
+    def __init__(self, orig_net, dilate_scale=8):
+        super(MobileNetV2Dilated, self).__init__()
+        from functools import partial
+        # take pretrained mobilenet features
+        self.features = orig_net.features[:-1]
+        self.total_idx = len(self.features)
+        self.down_idx = [2, 4, 7, 14]
+        if dilate_scale == 8:
+            for i in range(self.down_idx[-2], self.down_idx[-1]):
+                self.features[i].apply(
+                    partial(self._nostride_dilate, dilate=2)
+                )
+            for i in range(self.down_idx[-1], self.total_idx):
+                self.features[i].apply(
+                    partial(self._nostride_dilate, dilate=4)
+                )
+        elif dilate_scale == 16:
+            for i in range(self.down_idx[-1], self.total_idx):
+                self.features[i].apply(
+                    partial(self._nostride_dilate, dilate=2)
+                )
+    def _nostride_dilate(self, m, dilate):
+        classname = m.__class__.__name__
+        if classname.find('Conv') != -1:
+            # the convolution with stride
+            if m.stride == (2, 2):
+                m.stride = (1, 1)
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate//2, dilate//2)
+                    m.padding = (dilate//2, dilate//2)
+            # other convoluions
+            else:
+                if m.kernel_size == (3, 3):
+                    m.dilation = (dilate, dilate)
+                    m.padding = (dilate, dilate)
+    def forward(self, x, return_feature_maps=False):
+        if return_feature_maps:
+            conv_out = []
+            for i in range(self.total_idx):
+                x = self.features[i](x)
+                if i in self.down_idx:
+                    conv_out.append(x)
+            conv_out.append(x)
+            return conv_out
+        else:
+            return [self.features(x)]
+# last conv, deep supervision
+class C1DeepSup(nn.Module):
+    def __init__(self, num_class=150, fc_dim=2048, use_softmax=False, drop_last_conv=False):
+        super(C1DeepSup, self).__init__()
+        self.use_softmax = use_softmax
+        self.drop_last_conv = drop_last_conv
+        self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1)
+        self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1)
+        # last conv
+        self.conv_last = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
+        self.conv_last_deepsup = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
+    def forward(self, conv_out, segSize=None):
+        conv5 = conv_out[-1]
+        x = self.cbr(conv5)
+        if self.drop_last_conv:
+            return x
+        else:
+            x = self.conv_last(x)
+            if self.use_softmax:  # is True during inference
+                x = nn.functional.interpolate(
+                    x, size=segSize, mode='bilinear', align_corners=False)
+                x = nn.functional.softmax(x, dim=1)
+                return x
+            # deep sup
+            conv4 = conv_out[-2]
+            _ = self.cbr_deepsup(conv4)
+            _ = self.conv_last_deepsup(_)
+            x = nn.functional.log_softmax(x, dim=1)
+            _ = nn.functional.log_softmax(_, dim=1)
+            return (x, _)
+# last conv
+class C1(nn.Module):
+    def __init__(self, num_class=150, fc_dim=2048, use_softmax=False):
+        super(C1, self).__init__()
+        self.use_softmax = use_softmax
+        self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1)
+        # last conv
+        self.conv_last = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0)
+    def forward(self, conv_out, segSize=None):
+        conv5 = conv_out[-1]
+        x = self.cbr(conv5)
+        x = self.conv_last(x)
+        if self.use_softmax: # is True during inference
+            x = nn.functional.interpolate(
+                x, size=segSize, mode='bilinear', align_corners=False)
+            x = nn.functional.softmax(x, dim=1)
+        else:
+            x = nn.functional.log_softmax(x, dim=1)
+        return x
+# pyramid pooling
+class PPM(nn.Module):
+    def __init__(self, num_class=150, fc_dim=4096,
+                 use_softmax=False, pool_scales=(1, 2, 3, 6)):
+        super(PPM, self).__init__()
+        self.use_softmax = use_softmax
+        self.ppm = []
+        for scale in pool_scales:
+            self.ppm.append(nn.Sequential(
+                nn.AdaptiveAvgPool2d(scale),
+                nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False),
+                BatchNorm2d(512),
+                nn.ReLU(inplace=True)
+            ))
+        self.ppm = nn.ModuleList(self.ppm)
+        self.conv_last = nn.Sequential(
+            nn.Conv2d(fc_dim+len(pool_scales)*512, 512,
+                      kernel_size=3, padding=1, bias=False),
+            BatchNorm2d(512),
+            nn.ReLU(inplace=True),
+            nn.Dropout2d(0.1),
+            nn.Conv2d(512, num_class, kernel_size=1)
+        )
+    def forward(self, conv_out, segSize=None):
+        conv5 = conv_out[-1]
+        input_size = conv5.size()
+        ppm_out = [conv5]
+        for pool_scale in self.ppm:
+            ppm_out.append(nn.functional.interpolate(
+                pool_scale(conv5),
+                (input_size[2], input_size[3]),
+                mode='bilinear', align_corners=False))
+        ppm_out = torch.cat(ppm_out, 1)
+        x = self.conv_last(ppm_out)
+        if self.use_softmax:  # is True during inference
+            x = nn.functional.interpolate(
+                x, size=segSize, mode='bilinear', align_corners=False)
+            x = nn.functional.softmax(x, dim=1)
+        else:
+            x = nn.functional.log_softmax(x, dim=1)
+        return x

models/ade20k/color150.mat ADDED Viewed

Binary file (502 Bytes). View file

models/ade20k/mobilenet.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""
+This MobileNetV2 implementation is modified from the following repository:
+https://github.com/tonylins/pytorch-mobilenet-v2
+"""
+import torch.nn as nn
+import math
+from .utils import load_url
+from .segm_lib.nn import SynchronizedBatchNorm2d
+BatchNorm2d = SynchronizedBatchNorm2d
+__all__ = ['mobilenetv2']
+model_urls = {
+    'mobilenetv2': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/mobilenet_v2.pth.tar',
+}
+def conv_bn(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        BatchNorm2d(oup),
+        nn.ReLU6(inplace=True)
+    )
+def conv_1x1_bn(inp, oup):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        BatchNorm2d(oup),
+        nn.ReLU6(inplace=True)
+    )
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+        hidden_dim = round(inp * expand_ratio)
+        self.use_res_connect = self.stride == 1 and inp == oup
+        if expand_ratio == 1:
+            self.conv = nn.Sequential(
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                BatchNorm2d(oup),
+            )
+        else:
+            self.conv = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # dw
+                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                BatchNorm2d(oup),
+            )
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+class MobileNetV2(nn.Module):
+    def __init__(self, n_class=1000, input_size=224, width_mult=1.):
+        super(MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        last_channel = 1280
+        interverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        # building first layer
+        assert input_size % 32 == 0
+        input_channel = int(input_channel * width_mult)
+        self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
+        self.features = [conv_bn(3, input_channel, 2)]
+        # building inverted residual blocks
+        for t, c, n, s in interverted_residual_setting:
+            output_channel = int(c * width_mult)
+            for i in range(n):
+                if i == 0:
+                    self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
+                else:
+                    self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
+                input_channel = output_channel
+        # building last several layers
+        self.features.append(conv_1x1_bn(input_channel, self.last_channel))
+        # make it nn.Sequential
+        self.features = nn.Sequential(*self.features)
+        # building classifier
+        self.classifier = nn.Sequential(
+            nn.Dropout(0.2),
+            nn.Linear(self.last_channel, n_class),
+        )
+        self._initialize_weights()
+    def forward(self, x):
+        x = self.features(x)
+        x = x.mean(3).mean(2)
+        x = self.classifier(x)
+        return x
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                n = m.weight.size(1)
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+def mobilenetv2(pretrained=False, **kwargs):
+    """Constructs a MobileNet_V2 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = MobileNetV2(n_class=1000, **kwargs)
+    if pretrained:
+        model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False)
+    return model

models/ade20k/object150_info.csv ADDED Viewed

	@@ -0,0 +1,151 @@

+Idx,Ratio,Train,Val,Stuff,Name
+1,0.1576,11664,1172,1,wall
+2,0.1072,6046,612,1,building;edifice
+3,0.0878,8265,796,1,sky
+4,0.0621,9336,917,1,floor;flooring
+5,0.0480,6678,641,0,tree
+6,0.0450,6604,643,1,ceiling
+7,0.0398,4023,408,1,road;route
+8,0.0231,1906,199,0,bed
+9,0.0198,4688,460,0,windowpane;window
+10,0.0183,2423,225,1,grass
+11,0.0181,2874,294,0,cabinet
+12,0.0166,3068,310,1,sidewalk;pavement
+13,0.0160,5075,526,0,person;individual;someone;somebody;mortal;soul
+14,0.0151,1804,190,1,earth;ground
+15,0.0118,6666,796,0,door;double;door
+16,0.0110,4269,411,0,table
+17,0.0109,1691,160,1,mountain;mount
+18,0.0104,3999,441,0,plant;flora;plant;life
+19,0.0104,2149,217,0,curtain;drape;drapery;mantle;pall
+20,0.0103,3261,318,0,chair
+21,0.0098,3164,306,0,car;auto;automobile;machine;motorcar
+22,0.0074,709,75,1,water
+23,0.0067,3296,315,0,painting;picture
+24,0.0065,1191,106,0,sofa;couch;lounge
+25,0.0061,1516,162,0,shelf
+26,0.0060,667,69,1,house
+27,0.0053,651,57,1,sea
+28,0.0052,1847,224,0,mirror
+29,0.0046,1158,128,1,rug;carpet;carpeting
+30,0.0044,480,44,1,field
+31,0.0044,1172,98,0,armchair
+32,0.0044,1292,184,0,seat
+33,0.0033,1386,138,0,fence;fencing
+34,0.0031,698,61,0,desk
+35,0.0030,781,73,0,rock;stone
+36,0.0027,380,43,0,wardrobe;closet;press
+37,0.0026,3089,302,0,lamp
+38,0.0024,404,37,0,bathtub;bathing;tub;bath;tub
+39,0.0024,804,99,0,railing;rail
+40,0.0023,1453,153,0,cushion
+41,0.0023,411,37,0,base;pedestal;stand
+42,0.0022,1440,162,0,box
+43,0.0022,800,77,0,column;pillar
+44,0.0020,2650,298,0,signboard;sign
+45,0.0019,549,46,0,chest;of;drawers;chest;bureau;dresser
+46,0.0019,367,36,0,counter
+47,0.0018,311,30,1,sand
+48,0.0018,1181,122,0,sink
+49,0.0018,287,23,1,skyscraper
+50,0.0018,468,38,0,fireplace;hearth;open;fireplace
+51,0.0018,402,43,0,refrigerator;icebox
+52,0.0018,130,12,1,grandstand;covered;stand
+53,0.0018,561,64,1,path
+54,0.0017,880,102,0,stairs;steps
+55,0.0017,86,12,1,runway
+56,0.0017,172,11,0,case;display;case;showcase;vitrine
+57,0.0017,198,18,0,pool;table;billiard;table;snooker;table
+58,0.0017,930,109,0,pillow
+59,0.0015,139,18,0,screen;door;screen
+60,0.0015,564,52,1,stairway;staircase
+61,0.0015,320,26,1,river
+62,0.0015,261,29,1,bridge;span
+63,0.0014,275,22,0,bookcase
+64,0.0014,335,60,0,blind;screen
+65,0.0014,792,75,0,coffee;table;cocktail;table
+66,0.0014,395,49,0,toilet;can;commode;crapper;pot;potty;stool;throne
+67,0.0014,1309,138,0,flower
+68,0.0013,1112,113,0,book
+69,0.0013,266,27,1,hill
+70,0.0013,659,66,0,bench
+71,0.0012,331,31,0,countertop
+72,0.0012,531,56,0,stove;kitchen;stove;range;kitchen;range;cooking;stove
+73,0.0012,369,36,0,palm;palm;tree
+74,0.0012,144,9,0,kitchen;island
+75,0.0011,265,29,0,computer;computing;machine;computing;device;data;processor;electronic;computer;information;processing;system
+76,0.0010,324,33,0,swivel;chair
+77,0.0009,304,27,0,boat
+78,0.0009,170,20,0,bar
+79,0.0009,68,6,0,arcade;machine
+80,0.0009,65,8,1,hovel;hut;hutch;shack;shanty
+81,0.0009,248,25,0,bus;autobus;coach;charabanc;double-decker;jitney;motorbus;motorcoach;omnibus;passenger;vehicle
+82,0.0008,492,49,0,towel
+83,0.0008,2510,269,0,light;light;source
+84,0.0008,440,39,0,truck;motortruck
+85,0.0008,147,18,1,tower
+86,0.0008,583,56,0,chandelier;pendant;pendent
+87,0.0007,533,61,0,awning;sunshade;sunblind
+88,0.0007,1989,239,0,streetlight;street;lamp
+89,0.0007,71,5,0,booth;cubicle;stall;kiosk
+90,0.0007,618,53,0,television;television;receiver;television;set;tv;tv;set;idiot;box;boob;tube;telly;goggle;box
+91,0.0007,135,12,0,airplane;aeroplane;plane
+92,0.0007,83,5,1,dirt;track
+93,0.0007,178,17,0,apparel;wearing;apparel;dress;clothes
+94,0.0006,1003,104,0,pole
+95,0.0006,182,12,1,land;ground;soil
+96,0.0006,452,50,0,bannister;banister;balustrade;balusters;handrail
+97,0.0006,42,6,1,escalator;moving;staircase;moving;stairway
+98,0.0006,307,31,0,ottoman;pouf;pouffe;puff;hassock
+99,0.0006,965,114,0,bottle
+100,0.0006,117,13,0,buffet;counter;sideboard
+101,0.0006,354,35,0,poster;posting;placard;notice;bill;card
+102,0.0006,108,9,1,stage
+103,0.0006,557,55,0,van
+104,0.0006,52,4,0,ship
+105,0.0005,99,5,0,fountain
+106,0.0005,57,4,1,conveyer;belt;conveyor;belt;conveyer;conveyor;transporter
+107,0.0005,292,31,0,canopy
+108,0.0005,77,9,0,washer;automatic;washer;washing;machine
+109,0.0005,340,38,0,plaything;toy
+110,0.0005,66,3,1,swimming;pool;swimming;bath;natatorium
+111,0.0005,465,49,0,stool
+112,0.0005,50,4,0,barrel;cask
+113,0.0005,622,75,0,basket;handbasket
+114,0.0005,80,9,1,waterfall;falls
+115,0.0005,59,3,0,tent;collapsible;shelter
+116,0.0005,531,72,0,bag
+117,0.0005,282,30,0,minibike;motorbike
+118,0.0005,73,7,0,cradle
+119,0.0005,435,44,0,oven
+120,0.0005,136,25,0,ball
+121,0.0005,116,24,0,food;solid;food
+122,0.0004,266,31,0,step;stair
+123,0.0004,58,12,0,tank;storage;tank
+124,0.0004,418,83,0,trade;name;brand;name;brand;marque
+125,0.0004,319,43,0,microwave;microwave;oven
+126,0.0004,1193,139,0,pot;flowerpot
+127,0.0004,97,23,0,animal;animate;being;beast;brute;creature;fauna
+128,0.0004,347,36,0,bicycle;bike;wheel;cycle
+129,0.0004,52,5,1,lake
+130,0.0004,246,22,0,dishwasher;dish;washer;dishwashing;machine
+131,0.0004,108,13,0,screen;silver;screen;projection;screen
+132,0.0004,201,30,0,blanket;cover
+133,0.0004,285,21,0,sculpture
+134,0.0004,268,27,0,hood;exhaust;hood
+135,0.0003,1020,108,0,sconce
+136,0.0003,1282,122,0,vase
+137,0.0003,528,65,0,traffic;light;traffic;signal;stoplight
+138,0.0003,453,57,0,tray
+139,0.0003,671,100,0,ashcan;trash;can;garbage;can;wastebin;ash;bin;ash-bin;ashbin;dustbin;trash;barrel;trash;bin
+140,0.0003,397,44,0,fan
+141,0.0003,92,8,1,pier;wharf;wharfage;dock
+142,0.0003,228,18,0,crt;screen
+143,0.0003,570,59,0,plate
+144,0.0003,217,22,0,monitor;monitoring;device
+145,0.0003,206,19,0,bulletin;board;notice;board
+146,0.0003,130,14,0,shower
+147,0.0003,178,28,0,radiator
+148,0.0002,504,57,0,glass;drinking;glass
+149,0.0002,775,96,0,clock
+150,0.0002,421,56,0,flag

models/ade20k/resnet.py ADDED Viewed

	@@ -0,0 +1,181 @@

+"""Modified from https://github.com/CSAILVision/semantic-segmentation-pytorch"""
+import math
+import torch.nn as nn
+from torch.nn import BatchNorm2d
+from .utils import load_url
+__all__ = ['ResNet', 'resnet50']
+model_urls = {
+    'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000):
+        self.inplanes = 128
+        super(ResNet, self).__init__()
+        self.conv1 = conv3x3(3, 64, stride=2)
+        self.bn1 = BatchNorm2d(64)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(64, 64)
+        self.bn2 = BatchNorm2d(64)
+        self.relu2 = nn.ReLU(inplace=True)
+        self.conv3 = conv3x3(64, 128)
+        self.bn3 = BatchNorm2d(128)
+        self.relu3 = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.relu1(self.bn1(self.conv1(x)))
+        x = self.relu2(self.bn2(self.conv2(x)))
+        x = self.relu3(self.bn3(self.conv3(x)))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
+    return model
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        model.load_state_dict(load_url(model_urls['resnet18']))
+    return model

models/ade20k/segm_lib/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/ade20k/segm_lib/nn/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/ade20k/segm_lib/nn/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .modules import *
2	+ from .parallel import UserScatteredDataParallel, user_scattered_collate, async_copy_to

models/ade20k/segm_lib/nn/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# -*- coding: utf-8 -*-
+# File   : __init__.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
+from .replicate import DataParallelWithCallback, patch_replication_callback

models/ade20k/segm_lib/nn/modules/batchnorm.py ADDED Viewed

	@@ -0,0 +1,329 @@

+# -*- coding: utf-8 -*-
+# File   : batchnorm.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import collections
+import torch
+import torch.nn.functional as F
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
+from .comm import SyncMaster
+__all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d']
+def _sum_ft(tensor):
+    """sum over the first and last dimention"""
+    return tensor.sum(dim=0).sum(dim=-1)
+def _unsqueeze_ft(tensor):
+    """add new dementions at the front and the tail"""
+    return tensor.unsqueeze(0).unsqueeze(-1)
+_ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size'])
+_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
+class _SynchronizedBatchNorm(_BatchNorm):
+    def __init__(self, num_features, eps=1e-5, momentum=0.001, affine=True):
+        super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
+        self._sync_master = SyncMaster(self._data_parallel_master)
+        self._is_parallel = False
+        self._parallel_id = None
+        self._slave_pipe = None
+        # customed batch norm statistics
+        self._moving_average_fraction = 1. - momentum
+        self.register_buffer('_tmp_running_mean', torch.zeros(self.num_features))
+        self.register_buffer('_tmp_running_var', torch.ones(self.num_features))
+        self.register_buffer('_running_iter', torch.ones(1))
+        self._tmp_running_mean = self.running_mean.clone() * self._running_iter
+        self._tmp_running_var = self.running_var.clone() * self._running_iter
+    def forward(self, input):
+        # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation.
+        if not (self._is_parallel and self.training):
+            return F.batch_norm(
+                input, self.running_mean, self.running_var, self.weight, self.bias,
+                self.training, self.momentum, self.eps)
+        # Resize the input to (B, C, -1).
+        input_shape = input.size()
+        input = input.view(input.size(0), self.num_features, -1)
+        # Compute the sum and square-sum.
+        sum_size = input.size(0) * input.size(2)
+        input_sum = _sum_ft(input)
+        input_ssum = _sum_ft(input ** 2)
+        # Reduce-and-broadcast the statistics.
+        if self._parallel_id == 0:
+            mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))
+        else:
+            mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))
+        # Compute the output.
+        if self.affine:
+            # MJY:: Fuse the multiplication for speed.
+            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias)
+        else:
+            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std)
+        # Reshape it.
+        return output.view(input_shape)
+    def __data_parallel_replicate__(self, ctx, copy_id):
+        self._is_parallel = True
+        self._parallel_id = copy_id
+        # parallel_id == 0 means master device.
+        if self._parallel_id == 0:
+            ctx.sync_master = self._sync_master
+        else:
+            self._slave_pipe = ctx.sync_master.register_slave(copy_id)
+    def _data_parallel_master(self, intermediates):
+        """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
+        intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
+        to_reduce = [i[1][:2] for i in intermediates]
+        to_reduce = [j for i in to_reduce for j in i]  # flatten
+        target_gpus = [i[1].sum.get_device() for i in intermediates]
+        sum_size = sum([i[1].sum_size for i in intermediates])
+        sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
+        mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
+        broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
+        outputs = []
+        for i, rec in enumerate(intermediates):
+            outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2])))
+        return outputs
+    def _add_weighted(self, dest, delta, alpha=1, beta=1, bias=0):
+        """return *dest* by `dest := dest*alpha + delta*beta + bias`"""
+        return dest * alpha + delta * beta + bias
+    def _compute_mean_std(self, sum_, ssum, size):
+        """Compute the mean and standard-deviation with sum and square-sum. This method
+        also maintains the moving average on the master device."""
+        assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
+        mean = sum_ / size
+        sumvar = ssum - sum_ * mean
+        unbias_var = sumvar / (size - 1)
+        bias_var = sumvar / size
+        self._tmp_running_mean = self._add_weighted(self._tmp_running_mean, mean.data, alpha=self._moving_average_fraction)
+        self._tmp_running_var = self._add_weighted(self._tmp_running_var, unbias_var.data, alpha=self._moving_average_fraction)
+        self._running_iter = self._add_weighted(self._running_iter, 1, alpha=self._moving_average_fraction)
+        self.running_mean = self._tmp_running_mean / self._running_iter
+        self.running_var = self._tmp_running_var / self._running_iter
+        return mean, bias_var.clamp(self.eps) ** -0.5
+class SynchronizedBatchNorm1d(_SynchronizedBatchNorm):
+    r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a
+    mini-batch.
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm1d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm
+    Args:
+        num_features: num_features from an expected input of size
+            `batch_size x num_features [x width]`
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C)` or :math:`(N, C, L)`
+        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm1d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm1d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 2 and input.dim() != 3:
+            raise ValueError('expected 2D or 3D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm1d, self)._check_input_dim(input)
+class SynchronizedBatchNorm2d(_SynchronizedBatchNorm):
+    r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch
+    of 3d inputs
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm2d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C, H, W)`
+        - Output: :math:`(N, C, H, W)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm2d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm2d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 4:
+            raise ValueError('expected 4D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm2d, self)._check_input_dim(input)
+class SynchronizedBatchNorm3d(_SynchronizedBatchNorm):
+    r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch
+    of 4d inputs
+    .. math::
+        y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta
+    This module differs from the built-in PyTorch BatchNorm3d as the mean and
+    standard-deviation are reduced across all devices during training.
+    For example, when one uses `nn.DataParallel` to wrap the network during
+    training, PyTorch's implementation normalize the tensor on each device using
+    the statistics only on that device, which accelerated the computation and
+    is also easy to implement, but the statistics might be inaccurate.
+    Instead, in this synchronized version, the statistics will be computed
+    over all training samples distributed on multiple devices.
+    Note that, for one-GPU or CPU-only case, this module behaves exactly same
+    as the built-in PyTorch implementation.
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and gamma and beta are learnable parameter vectors
+    of size C (where C is the input size).
+    During training, this layer keeps a running estimate of its computed mean
+    and variance. The running sum is kept with a default momentum of 0.1.
+    During evaluation, this running mean/variance is used for normalization.
+    Because the BatchNorm is done over the `C` dimension, computing statistics
+    on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm
+    or Spatio-temporal BatchNorm
+    Args:
+        num_features: num_features from an expected input of
+            size batch_size x num_features x depth x height x width
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, gives the layer learnable
+            affine parameters. Default: ``True``
+    Shape:
+        - Input: :math:`(N, C, D, H, W)`
+        - Output: :math:`(N, C, D, H, W)` (same shape as input)
+    Examples:
+        >>> # With Learnable Parameters
+        >>> m = SynchronizedBatchNorm3d(100)
+        >>> # Without Learnable Parameters
+        >>> m = SynchronizedBatchNorm3d(100, affine=False)
+        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10))
+        >>> output = m(input)
+    """
+    def _check_input_dim(self, input):
+        if input.dim() != 5:
+            raise ValueError('expected 5D input (got {}D input)'
+                             .format(input.dim()))
+        super(SynchronizedBatchNorm3d, self)._check_input_dim(input)

models/ade20k/segm_lib/nn/modules/comm.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# -*- coding: utf-8 -*-
+# File   : comm.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import queue
+import collections
+import threading
+__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
+class FutureResult(object):
+    """A thread-safe future implementation. Used only as one-to-one pipe."""
+    def __init__(self):
+        self._result = None
+        self._lock = threading.Lock()
+        self._cond = threading.Condition(self._lock)
+    def put(self, result):
+        with self._lock:
+            assert self._result is None, 'Previous result has\'t been fetched.'
+            self._result = result
+            self._cond.notify()
+    def get(self):
+        with self._lock:
+            if self._result is None:
+                self._cond.wait()
+            res = self._result
+            self._result = None
+            return res
+_MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
+_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
+class SlavePipe(_SlavePipeBase):
+    """Pipe for master-slave communication."""
+    def run_slave(self, msg):
+        self.queue.put((self.identifier, msg))
+        ret = self.result.get()
+        self.queue.put(True)
+        return ret
+class SyncMaster(object):
+    """An abstract `SyncMaster` object.
+    - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
+    call `register(id)` and obtain an `SlavePipe` to communicate with the master.
+    - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
+    and passed to a registered callback.
+    - After receiving the messages, the master device should gather the information and determine to message passed
+    back to each slave devices.
+    """
+    def __init__(self, master_callback):
+        """
+        Args:
+            master_callback: a callback to be invoked after having collected messages from slave devices.
+        """
+        self._master_callback = master_callback
+        self._queue = queue.Queue()
+        self._registry = collections.OrderedDict()
+        self._activated = False
+    def register_slave(self, identifier):
+        """
+        Register an slave device.
+        Args:
+            identifier: an identifier, usually is the device id.
+        Returns: a `SlavePipe` object which can be used to communicate with the master device.
+        """
+        if self._activated:
+            assert self._queue.empty(), 'Queue is not clean before next initialization.'
+            self._activated = False
+            self._registry.clear()
+        future = FutureResult()
+        self._registry[identifier] = _MasterRegistry(future)
+        return SlavePipe(identifier, self._queue, future)
+    def run_master(self, master_msg):
+        """
+        Main entry for the master device in each forward pass.
+        The messages were first collected from each devices (including the master device), and then
+        an callback will be invoked to compute the message to be sent back to each devices
+        (including the master device).
+        Args:
+            master_msg: the message that the master want to send to itself. This will be placed as the first
+            message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
+        Returns: the message to be sent back to the master device.
+        """
+        self._activated = True
+        intermediates = [(0, master_msg)]
+        for i in range(self.nr_slaves):
+            intermediates.append(self._queue.get())
+        results = self._master_callback(intermediates)
+        assert results[0][0] == 0, 'The first result should belongs to the master.'
+        for i, res in results:
+            if i == 0:
+                continue
+            self._registry[i].result.put(res)
+        for i in range(self.nr_slaves):
+            assert self._queue.get() is True
+        return results[0][1]
+    @property
+    def nr_slaves(self):
+        return len(self._registry)

models/ade20k/segm_lib/nn/modules/replicate.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# -*- coding: utf-8 -*-
+# File   : replicate.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import functools
+from torch.nn.parallel.data_parallel import DataParallel
+__all__ = [
+    'CallbackContext',
+    'execute_replication_callbacks',
+    'DataParallelWithCallback',
+    'patch_replication_callback'
+]
+class CallbackContext(object):
+    pass
+def execute_replication_callbacks(modules):
+    """
+    Execute an replication callback `__data_parallel_replicate__` on each module created by original replication.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Note that, as all modules are isomorphism, we assign each sub-module with a context
+    (shared among multiple copies of this module on different devices).
+    Through this context, different copies can share some information.
+    We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback
+    of any slave copies.
+    """
+    master_copy = modules[0]
+    nr_modules = len(list(master_copy.modules()))
+    ctxs = [CallbackContext() for _ in range(nr_modules)]
+    for i, module in enumerate(modules):
+        for j, m in enumerate(module.modules()):
+            if hasattr(m, '__data_parallel_replicate__'):
+                m.__data_parallel_replicate__(ctxs[j], i)
+class DataParallelWithCallback(DataParallel):
+    """
+    Data Parallel with a replication callback.
+    An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by
+    original `replicate` function.
+    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+        # sync_bn.__data_parallel_replicate__ will be invoked.
+    """
+    def replicate(self, module, device_ids):
+        modules = super(DataParallelWithCallback, self).replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+def patch_replication_callback(data_parallel):
+    """
+    Monkey-patch an existing `DataParallel` object. Add the replication callback.
+    Useful when you have customized `DataParallel` implementation.
+    Examples:
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
+        > patch_replication_callback(sync_bn)
+        # this is equivalent to
+        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+    """
+    assert isinstance(data_parallel, DataParallel)
+    old_replicate = data_parallel.replicate
+    @functools.wraps(old_replicate)
+    def new_replicate(module, device_ids):
+        modules = old_replicate(module, device_ids)
+        execute_replication_callbacks(modules)
+        return modules
+    data_parallel.replicate = new_replicate

models/ade20k/segm_lib/nn/modules/tests/test_numeric_batchnorm.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# -*- coding: utf-8 -*-
+# File   : test_numeric_batchnorm.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+import unittest
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from sync_batchnorm.unittest import TorchTestCase
+def handy_var(a, unbias=True):
+    n = a.size(0)
+    asum = a.sum(dim=0)
+    as_sum = (a ** 2).sum(dim=0)  # a square sum
+    sumvar = as_sum - asum * asum / n
+    if unbias:
+        return sumvar / (n - 1)
+    else:
+        return sumvar / n
+class NumericTestCase(TorchTestCase):
+    def testNumericBatchNorm(self):
+        a = torch.rand(16, 10)
+        bn = nn.BatchNorm2d(10, momentum=1, eps=1e-5, affine=False)
+        bn.train()
+        a_var1 = Variable(a, requires_grad=True)
+        b_var1 = bn(a_var1)
+        loss1 = b_var1.sum()
+        loss1.backward()
+        a_var2 = Variable(a, requires_grad=True)
+        a_mean2 = a_var2.mean(dim=0, keepdim=True)
+        a_std2 = torch.sqrt(handy_var(a_var2, unbias=False).clamp(min=1e-5))
+        # a_std2 = torch.sqrt(a_var2.var(dim=0, keepdim=True, unbiased=False) + 1e-5)
+        b_var2 = (a_var2 - a_mean2) / a_std2
+        loss2 = b_var2.sum()
+        loss2.backward()
+        self.assertTensorClose(bn.running_mean, a.mean(dim=0))
+        self.assertTensorClose(bn.running_var, handy_var(a))
+        self.assertTensorClose(a_var1.data, a_var2.data)
+        self.assertTensorClose(b_var1.data, b_var2.data)
+        self.assertTensorClose(a_var1.grad, a_var2.grad)
+if __name__ == '__main__':
+    unittest.main()

models/ade20k/segm_lib/nn/modules/tests/test_sync_batchnorm.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# -*- coding: utf-8 -*-
+# File   : test_sync_batchnorm.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+import unittest
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from sync_batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, DataParallelWithCallback
+from sync_batchnorm.unittest import TorchTestCase
+def handy_var(a, unbias=True):
+    n = a.size(0)
+    asum = a.sum(dim=0)
+    as_sum = (a ** 2).sum(dim=0)  # a square sum
+    sumvar = as_sum - asum * asum / n
+    if unbias:
+        return sumvar / (n - 1)
+    else:
+        return sumvar / n
+def _find_bn(module):
+    for m in module.modules():
+        if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, SynchronizedBatchNorm1d, SynchronizedBatchNorm2d)):
+            return m
+class SyncTestCase(TorchTestCase):
+    def _syncParameters(self, bn1, bn2):
+        bn1.reset_parameters()
+        bn2.reset_parameters()
+        if bn1.affine and bn2.affine:
+            bn2.weight.data.copy_(bn1.weight.data)
+            bn2.bias.data.copy_(bn1.bias.data)
+    def _checkBatchNormResult(self, bn1, bn2, input, is_train, cuda=False):
+        """Check the forward and backward for the customized batch normalization."""
+        bn1.train(mode=is_train)
+        bn2.train(mode=is_train)
+        if cuda:
+            input = input.cuda()
+        self._syncParameters(_find_bn(bn1), _find_bn(bn2))
+        input1 = Variable(input, requires_grad=True)
+        output1 = bn1(input1)
+        output1.sum().backward()
+        input2 = Variable(input, requires_grad=True)
+        output2 = bn2(input2)
+        output2.sum().backward()
+        self.assertTensorClose(input1.data, input2.data)
+        self.assertTensorClose(output1.data, output2.data)
+        self.assertTensorClose(input1.grad, input2.grad)
+        self.assertTensorClose(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean)
+        self.assertTensorClose(_find_bn(bn1).running_var, _find_bn(bn2).running_var)
+    def testSyncBatchNormNormalTrain(self):
+        bn = nn.BatchNorm1d(10)
+        sync_bn = SynchronizedBatchNorm1d(10)
+        self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), True)
+    def testSyncBatchNormNormalEval(self):
+        bn = nn.BatchNorm1d(10)
+        sync_bn = SynchronizedBatchNorm1d(10)
+        self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), False)
+    def testSyncBatchNormSyncTrain(self):
+        bn = nn.BatchNorm1d(10, eps=1e-5, affine=False)
+        sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+        bn.cuda()
+        sync_bn.cuda()
+        self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), True, cuda=True)
+    def testSyncBatchNormSyncEval(self):
+        bn = nn.BatchNorm1d(10, eps=1e-5, affine=False)
+        sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
+        sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+        bn.cuda()
+        sync_bn.cuda()
+        self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10), False, cuda=True)
+    def testSyncBatchNorm2DSyncTrain(self):
+        bn = nn.BatchNorm2d(10)
+        sync_bn = SynchronizedBatchNorm2d(10)
+        sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
+        bn.cuda()
+        sync_bn.cuda()
+        self._checkBatchNormResult(bn, sync_bn, torch.rand(16, 10, 16, 16), True, cuda=True)
+if __name__ == '__main__':
+    unittest.main()

models/ade20k/segm_lib/nn/modules/unittest.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# -*- coding: utf-8 -*-
+# File   : unittest.py
+# Author : Jiayuan Mao
+# Email  : [email protected]
+# Date   : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+import unittest
+import numpy as np
+from torch.autograd import Variable
+def as_numpy(v):
+    if isinstance(v, Variable):
+        v = v.data
+    return v.cpu().numpy()
+class TorchTestCase(unittest.TestCase):
+    def assertTensorClose(self, a, b, atol=1e-3, rtol=1e-3):
+        npa, npb = as_numpy(a), as_numpy(b)
+        self.assertTrue(
+                np.allclose(npa, npb, atol=atol),
+                'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max())
+        )

models/ade20k/segm_lib/nn/parallel/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .data_parallel import UserScatteredDataParallel, user_scattered_collate, async_copy_to