{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "GAZViC5o2bya", "QwoVd4CE2njF", "8r0qzU2NRoIT", "lgaEjLAo7lMd", "RadVNaev2_mF" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Dependencies" ], "metadata": { "id": "GAZViC5o2bya" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wNCZ04U82IiL", "outputId": "dc277e76-67e7-4781-95a1-123bb139bbf3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting onnxruntime\n", " Downloading onnxruntime-1.17.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting coloredlogs (from onnxruntime)\n", " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime) (23.5.26)\n", "Requirement already satisfied: numpy>=1.21.6 in /usr/local/lib/python3.10/dist-packages (from onnxruntime) (1.25.2)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from onnxruntime) (23.2)\n", "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime) (3.20.3)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime) (1.12)\n", "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)\n", " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime) (1.3.0)\n", "Installing collected packages: humanfriendly, coloredlogs, onnxruntime\n", "Successfully installed coloredlogs-15.0.1 humanfriendly-10.0 onnxruntime-1.17.1\n", "Collecting onnx\n", " Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from onnx) (1.25.2)\n", "Requirement already satisfied: protobuf>=3.20.2 in /usr/local/lib/python3.10/dist-packages (from onnx) (3.20.3)\n", "Installing collected packages: onnx\n", "Successfully installed onnx-1.15.0\n", "Collecting onnxruntime-extensions\n", " Downloading onnxruntime_extensions-0.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.0/7.0 MB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: onnxruntime-extensions\n", "Successfully installed onnxruntime-extensions-0.10.1\n" ] } ], "source": [ "!pip install onnxruntime\n", "!pip install onnx\n", "!pip install onnxruntime-extensions" ] }, { "cell_type": "markdown", "source": [ "# Download ONNX Model\n", "This downloads [wd-convnext-tagger-v3](https://huggingface.co/SmilingWolf/wd-convnext-tagger-v3) created by [SmilingWolf](https://huggingface.co/SmilingWolf).\n", "\n", "Feel free to use SmilingWolfs other model variants instead.\n", "\n", "The tags and power image is also downloaded for inferencing." ], "metadata": { "id": "QwoVd4CE2njF" } }, { "cell_type": "code", "source": [ "!wget https://huggingface.co/SmilingWolf/wd-convnext-tagger-v3/resolve/main/model.onnx?download=true -O model.onnx" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AMF_IIxm2tT_", "outputId": "b8e574b8-8276-4f74-92fa-b3a946e92655" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2024-03-09 05:03:49-- https://huggingface.co/SmilingWolf/wd-convnext-tagger-v3/resolve/main/model.onnx?download=true\n", "Resolving huggingface.co (huggingface.co)... 3.163.189.90, 3.163.189.74, 3.163.189.37, ...\n", "Connecting to huggingface.co (huggingface.co)|3.163.189.90|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://cdn-lfs-us-1.huggingface.co/repos/d8/61/d8612304f05de662484c881a2ac180318d718b820314ffaaa700ef22c267e1a1/02f30d4de9bada756981a11464d13aa206f5e2d4ff6da384511beb812d58b2ca?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model.onnx%3B+filename%3D%22model.onnx%22%3B&Expires=1710219829&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDIxOTgyOX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Q4LzYxL2Q4NjEyMzA0ZjA1ZGU2NjI0ODRjODgxYTJhYzE4MDMxOGQ3MThiODIwMzE0ZmZhYWE3MDBlZjIyYzI2N2UxYTEvMDJmMzBkNGRlOWJhZGE3NTY5ODFhMTE0NjRkMTNhYTIwNmY1ZTJkNGZmNmRhMzg0NTExYmViODEyZDU4YjJjYT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=NUW35U0E0VvUCTynr4WArU1pgdg-F506HK5TiNnP7IrwbhEJfQpEcJo5CBoz1e4iUprWUCcEZJS0dRCmlGrr0PGYIjKXZ00BE4EiGZyi2vUqdP%7ExxUzWxps6XwEIVGiXc5R9yC%7EQgtd6oSJYQOH4ITBvEoNOJoQUPnjL5m1vk9T8-xHpeAxkHkHeOaF8FjlU5HKvUIc65SlUGirxOsHXl0v8o7sKmYlFs0Nmkoj9MurWKFL0sLFW5XIxkZveAGS9GB2sisitzkc4BUhICqDMSfv5CtlTEhXpgDUGbFo%7EohbeuKkQjIgnSU%7EVdFhDvY7Qew%7E5emodk-508AHvCx-UrA__&Key-Pair-Id=KCD77M1F0VK2B [following]\n", "--2024-03-09 05:03:49-- https://cdn-lfs-us-1.huggingface.co/repos/d8/61/d8612304f05de662484c881a2ac180318d718b820314ffaaa700ef22c267e1a1/02f30d4de9bada756981a11464d13aa206f5e2d4ff6da384511beb812d58b2ca?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model.onnx%3B+filename%3D%22model.onnx%22%3B&Expires=1710219829&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDIxOTgyOX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Q4LzYxL2Q4NjEyMzA0ZjA1ZGU2NjI0ODRjODgxYTJhYzE4MDMxOGQ3MThiODIwMzE0ZmZhYWE3MDBlZjIyYzI2N2UxYTEvMDJmMzBkNGRlOWJhZGE3NTY5ODFhMTE0NjRkMTNhYTIwNmY1ZTJkNGZmNmRhMzg0NTExYmViODEyZDU4YjJjYT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=NUW35U0E0VvUCTynr4WArU1pgdg-F506HK5TiNnP7IrwbhEJfQpEcJo5CBoz1e4iUprWUCcEZJS0dRCmlGrr0PGYIjKXZ00BE4EiGZyi2vUqdP%7ExxUzWxps6XwEIVGiXc5R9yC%7EQgtd6oSJYQOH4ITBvEoNOJoQUPnjL5m1vk9T8-xHpeAxkHkHeOaF8FjlU5HKvUIc65SlUGirxOsHXl0v8o7sKmYlFs0Nmkoj9MurWKFL0sLFW5XIxkZveAGS9GB2sisitzkc4BUhICqDMSfv5CtlTEhXpgDUGbFo%7EohbeuKkQjIgnSU%7EVdFhDvY7Qew%7E5emodk-508AHvCx-UrA__&Key-Pair-Id=KCD77M1F0VK2B\n", "Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 3.163.189.20, 3.163.189.28, 3.163.189.91, ...\n", "Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|3.163.189.20|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 394990732 (377M) [application/octet-stream]\n", "Saving to: ‘model.onnx’\n", "\n", "model.onnx 100%[===================>] 376.69M 31.6MB/s in 5.2s \n", "\n", "2024-03-09 05:03:54 (72.7 MB/s) - ‘model.onnx’ saved [394990732/394990732]\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Download Tags / Test Image" ], "metadata": { "id": "8r0qzU2NRoIT" } }, { "cell_type": "code", "source": [ "!wget https://huggingface.co/SmilingWolf/wd-convnext-tagger-v3/resolve/main/selected_tags.csv?download=true -O tags.csv\n", "!wget https://huggingface.co/spaces/SmilingWolf/wd-tagger/resolve/main/power.jpg?download=true -O power.jpg" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "WPrRzNP-RqKs", "outputId": "a4a5af15-3bf2-4383-d4de-616e85485c20" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "--2024-03-09 05:03:54-- https://huggingface.co/SmilingWolf/wd-convnext-tagger-v3/resolve/main/selected_tags.csv?download=true\n", "Resolving huggingface.co (huggingface.co)... 3.163.189.90, 3.163.189.74, 3.163.189.37, ...\n", "Connecting to huggingface.co (huggingface.co)|3.163.189.90|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 308468 (301K) [text/plain]\n", "Saving to: ‘tags.csv’\n", "\n", "\rtags.csv 0%[ ] 0 --.-KB/s \rtags.csv 100%[===================>] 301.24K --.-KB/s in 0.03s \n", "\n", "2024-03-09 05:03:54 (11.1 MB/s) - ‘tags.csv’ saved [308468/308468]\n", "\n", "--2024-03-09 05:03:55-- https://huggingface.co/spaces/SmilingWolf/wd-tagger/resolve/main/power.jpg?download=true\n", "Resolving huggingface.co (huggingface.co)... 3.163.189.90, 3.163.189.74, 3.163.189.37, ...\n", "Connecting to huggingface.co (huggingface.co)|3.163.189.90|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 91159 (89K) [image/jpeg]\n", "Saving to: ‘power.jpg’\n", "\n", "power.jpg 100%[===================>] 89.02K --.-KB/s in 0.01s \n", "\n", "2024-03-09 05:03:55 (8.09 MB/s) - ‘power.jpg’ saved [91159/91159]\n", "\n" ] } ] }, { "cell_type": "markdown", "source": [ "# ONNX QUANT\n", "To cut down on model size and have it work on mobile devices, quantization is needed (i think).\n", "\n", "First preprocess model for quantization - then quantize.\n", "\n", "The quant model name will be **model.quant.onnx**\n", "\n", "The convnext model went from ~377 MB down to 105 MB!" ], "metadata": { "id": "lgaEjLAo7lMd" } }, { "cell_type": "code", "source": [ "!python -m onnxruntime.quantization.preprocess --input model.onnx --output model_pre_quant.onnx" ], "metadata": { "id": "sdk95gWw7Imp" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "import onnx\n", "from onnxruntime.quantization import quantize_dynamic, QuantType\n", "\n", "model_fp32 = 'model_pre_quant.onnx'\n", "model_quant = 'model.quant.onnx'\n", "# quantized_model = quantize_dynamic(model_fp32, model_quant, nodes_to_exclude=[\"Conv\", \"/core_model/stem/stem.0/Conv\", \"/core_model/stages/stages.0/blocks/blocks.0/conv_dw/Conv\", \"/core_model/stages/stages.0/blocks/blocks.1/conv_dw/Conv\", \"/core_model/stages/stages.0/blocks/blocks.2/conv_dw/Conv\"])\n", "quantized_model = quantize_dynamic(model_fp32, model_quant, op_types_to_quantize=['MatMul', 'Transpose', 'Gemm', 'LayerNormalization'])\n", "\n", "# remove unneeded model\n", "%rm model_pre_quant.onnx" ], "metadata": { "id": "E7M68khX7H93" }, "execution_count": 5, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Add Preprocessing / Postprocessing\n", "\n", "To make mobile inference easier, we will add preprocessing to the model.\n", "\n", "Instead of resizing, adding padding, converting image to float32 array, and converting to BGR before inferencing - we can add these steps to the model so that only a uint8 tensor is needed.\n", "\n", "The model will be named **model.quant.preproc.onnx**\n", "\n", "**WARNING** \n", "It's very possible that I could be doing this wrong or that it could have some improvements. I'm not really sure what I'm doing but I found that these settings have given me the closest results to the base quant model." ], "metadata": { "id": "TmCdMPTwb6Mc" } }, { "cell_type": "code", "source": [ "import onnx\n", "from onnxruntime_extensions.tools.pre_post_processing import create_named_value, Normalize, Transpose, Debug, ReverseAxis, PixelsToYCbCr, PrePostProcessor, Unsqueeze, LetterBox, ConvertImageToBGR, Resize, CenterCrop, ImageBytesToFloat, ChannelsLastToChannelsFirst\n", "\n", "image_mean = [0.5,0.5,0.5]\n", "image_std = [0.5,0.5,0.5]\n", "\n", "img_size = 448\n", "mean_std = list(zip(image_mean, image_std))\n", "new_input = create_named_value('image', onnx.TensorProto.UINT8, [\"num_bytes\"])\n", "pipeline = PrePostProcessor([new_input], onnx_opset=18)\n", "pipeline.add_pre_processing(\n", " [\n", "\n", " ConvertImageToBGR(),\n", " Resize((img_size, img_size), policy=\"not_larger\"),\n", " LetterBox(target_shape=(img_size, img_size)), # adds padding\n", " ImageBytesToFloat((255/2) / 255), # NO IDEA WHAT IM DOING. all i know is that the default value gives bad results\n", " Normalize(mean_std, layout='HWC'), # copied values from the config on HF. seems to help results match closer to non-preprocessed model.\n", " Unsqueeze(axes=[0]), # add batch dim so shape is {1, 448, 448, channels}.\n", " ]\n", ")" ], "metadata": { "id": "HzJjPcSrb-DL" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "# Save Model\n", "model = onnx.load('model.quant.onnx')\n", "new_model = pipeline.run(model)\n", "onnx.save_model(new_model, 'model.quant.preproc.onnx')" ], "metadata": { "id": "_zjoi_AWhIZN" }, "execution_count": 8, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Test Model\n", "Most of the inference code is directly from SmilingWolf's wd tagger space: https://huggingface.co/spaces/SmilingWolf/wd-tagger/blob/main/app.py" ], "metadata": { "id": "nXk6AZM0kfL4" } }, { "cell_type": "code", "source": [ "import onnxruntime as _ort\n", "from onnxruntime_extensions import get_library_path as _lib_path\n", "from PIL import Image\n", "import numpy as np\n", "import pandas as pd\n", "\n", "# Step 1: setup session options\n", "so = _ort.SessionOptions()\n", "so.register_custom_ops_library(_lib_path())\n", "\n", "# Step 2: create session\n", "sess = _ort.InferenceSession(\"/content/model.quant.preproc.onnx\",so) # Don't forget to add session options (so)\n", "\n", "# Step 3: load image (no preprocessing needed!)\n", "image = np.frombuffer(open('/content/power.jpg', 'rb').read(), dtype=np.uint8)\n", "\n", "# Step 4: run cell!\n", "\n", "\n", "###### Inference Code ######\n", "kaomojis = [\n", " \"0_0\",\n", " \"(o)_(o)\",\n", " \"+_+\",\n", " \"+_-\",\n", " \"._.\",\n", " \"_\",\n", " \"<|>_<|>\",\n", " \"=_=\",\n", " \">_<\",\n", " \"3_3\",\n", " \"6_9\",\n", " \">_o\",\n", " \"@_@\",\n", " \"^_^\",\n", " \"o_o\",\n", " \"u_u\",\n", " \"x_x\",\n", " \"|_|\",\n", " \"||_||\",\n", "]\n", "\n", "\n", "def load_labels(dataframe) -> list[str]:\n", " name_series = dataframe[\"name\"]\n", " name_series = name_series.map(\n", " lambda x: x.replace(\"_\", \" \") if x not in kaomojis else x\n", " )\n", " tag_names = name_series.tolist()\n", "\n", " rating_indexes = list(np.where(dataframe[\"category\"] == 9)[0])\n", " general_indexes = list(np.where(dataframe[\"category\"] == 0)[0])\n", " character_indexes = list(np.where(dataframe[\"category\"] == 4)[0])\n", " return tag_names, rating_indexes, general_indexes, character_indexes\n", "\n", "csv_path = \"/content/tags.csv\"\n", "\n", "tags_df = pd.read_csv(csv_path)\n", "sep_tags = load_labels(tags_df)\n", "\n", "tag_names = sep_tags[0]\n", "rating_indexes = sep_tags[1]\n", "general_indexes = sep_tags[2]\n", "character_indexes = sep_tags[3]\n", "\n", "input_name = sess.get_inputs()[0].name\n", "label_name = sess.get_outputs()[0].name\n", "\n", "preds = sess.run([label_name], {input_name: image})[0]\n", "\n", "\n", "labels = list(zip(tag_names, preds[0].astype(float)))\n", "ratings_names = [labels[i] for i in rating_indexes]\n", "rating = dict(ratings_names)\n", "\n", "character_names = [labels[i] for i in character_indexes]\n", "\n", "character_res = [x for x in character_names if x[1] > 0.85]\n", "character_res = dict(character_res)\n", "\n", "general_names = [labels[i] for i in general_indexes]\n", "general_res = [x for x in general_names if x[1] > 0.35]\n", "general_res = dict(general_res)\n", "\n", "sorted_general_strings = sorted(\n", " general_res.items(),\n", " key=lambda x: x[1],\n", " reverse=True,\n", ")\n", "sorted_general_strings = [x[0] for x in sorted_general_strings]\n", "sorted_general_strings = (\n", " \", \".join(sorted_general_strings).replace(\"(\", \"\\(\").replace(\")\", \"\\)\")\n", ")\n", "\n", "print(rating)\n", "print(character_res)\n", "print(general_res)\n", "print(sorted_general_strings)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "m5B0Wj4NkhMt", "outputId": "6b551906-9b9e-4db9-f2bc-5ae5427381a3" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{'general': 0.9169240593910217, 'sensitive': 0.0812525749206543, 'questionable': 0.0006865859031677246, 'explicit': 0.0002942383289337158}\n", "{'power (chainsaw man)': 0.9924684762954712}\n", "{'1girl': 0.9980734586715698, 'solo': 0.967477560043335, 'long hair': 0.8743129968643188, 'looking at viewer': 0.8921941518783569, 'smile': 0.7079806327819824, 'open mouth': 0.8572969436645508, 'simple background': 0.6686466336250305, 'shirt': 0.9388805627822876, 'blonde hair': 0.647895336151123, 'white background': 0.5928694009780884, 'red eyes': 0.4210684299468994, 'hair between eyes': 0.8992906212806702, 'jacket': 0.5598545074462891, 'white shirt': 0.8964416980743408, 'upper body': 0.666782557964325, 'horns': 0.9738106727600098, 'teeth': 0.9321538209915161, 'necktie': 0.9494357109069824, 'collared shirt': 0.8381757736206055, 'orange eyes': 0.4594384431838989, 'symbol-shaped pupils': 0.8655499219894409, 'fangs': 0.3685188889503479, 'demon horns': 0.5966249704360962, 'sharp teeth': 0.8942122459411621, 'black necktie': 0.8483953475952148, 'claw pose': 0.5946617722511292, 'red horns': 0.9497503042221069, 'cross-shaped pupils': 0.9292328357696533, 'pillarboxed': 0.766990065574646}\n", "1girl, horns, solo, red horns, necktie, shirt, teeth, cross-shaped pupils, hair between eyes, white shirt, sharp teeth, looking at viewer, long hair, symbol-shaped pupils, open mouth, black necktie, collared shirt, pillarboxed, smile, simple background, upper body, blonde hair, demon horns, claw pose, white background, jacket, orange eyes, red eyes, fangs\n" ] } ] } ] }