Spaces:

SmilingWolf
/

danbooru2022_embeddings_playground

Running

App Files Files Community

SmilingWolf commited on May 27, 2024

Commit

0bd8f65

1 Parent(s): 8444c60

Add image support

Browse files

Files changed (6) hide show

.gitattributes +5 -0
app.py +98 -22
examples/46657164_p1.jpg +0 -0
examples/60378883_p0.jpg +0 -0
examples/DaRlExxUwAAcUOS-orig.jpg +0 -0
requirements.txt +2 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.index filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.index filter=lfs diff=lfs merge=lfs -text
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class

app.py CHANGED Viewed

@@ -7,10 +7,20 @@ import jax
 import numpy as np
 import pandas as pd
 import requests
 from Models.CLIP import CLIP
 def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
     headers = {"User-Agent": "image_similarity_tool"}
     ratings_to_letters = {
@@ -56,6 +66,8 @@ class Predictor:
     def predict(
         self,
         positive_tags,
         negative_tags,
         selected_ratings,
@@ -68,38 +80,68 @@ class Predictor:
         num_classes = len(tags_df)
         positive_tags = positive_tags.split(",")
         negative_tags = negative_tags.split(",")
         positive_tags_idxs = tags_df.index[tags_df["name"].isin(positive_tags)].tolist()
         negative_tags_idxs = tags_df.index[tags_df["name"].isin(negative_tags)].tolist()
-        tags = np.zeros((1, num_classes), dtype=np.float32)
-        tags[0][positive_tags_idxs] = 1
-        emb_from_logits = model.apply(
-            {"params": self.params},
-            tags,
-            method=model.encode_text,
-        )
-        emb_from_logits = jax.device_get(emb_from_logits)
-        faiss.normalize_L2(emb_from_logits)
         if len(negative_tags_idxs) > 0:
             tags = np.zeros((1, num_classes), dtype=np.float32)
             tags[0][negative_tags_idxs] = 1
-            neg_emb_from_logits = model.apply(
                 {"params": self.params},
                 tags,
                 method=model.encode_text,
             )
-            neg_emb_from_logits = jax.device_get(neg_emb_from_logits)
-            faiss.normalize_L2(neg_emb_from_logits)
-            emb_from_logits = emb_from_logits - neg_emb_from_logits
-            faiss.normalize_L2(emb_from_logits)
-        dists, indexes = self.knn_index.search(emb_from_logits, k=n_neighbours)
         neighbours_ids = self.images_ids[indexes][0]
         neighbours_ids = [int(x) for x in neighbours_ids]
@@ -122,10 +164,19 @@ def main():
     predictor = Predictor()
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
                 positive_tags = gr.Textbox(label="Positive tags")
                 negative_tags = gr.Textbox(label="Negative tags")
                 n_neighbours = gr.Slider(
                     minimum=1,
                     maximum=20,
@@ -133,15 +184,10 @@ def main():
                     step=1,
                     label="# of images",
                 )
             with gr.Column():
                 api_username = gr.Textbox(label="Danbooru API Username")
                 api_key = gr.Textbox(label="Danbooru API Key")
-                selected_ratings = gr.CheckboxGroup(
-                    choices=["General", "Sensitive", "Questionable", "Explicit"],
-                    value=["General", "Sensitive"],
-                    label="Ratings",
-                )
         find_btn = gr.Button("Find similar images")
         similar_images = gr.Gallery(label="Similar images", columns=[5])
@@ -149,6 +195,8 @@ def main():
         examples = gr.Examples(
             [
                 [
                     "marcille_donato",
                     "",
                     ["General", "Sensitive"],
@@ -157,6 +205,8 @@ def main():
                     "",
                 ],
                 [
                     "yellow_eyes,red_horns",
                     "",
                     ["General", "Sensitive"],
@@ -165,6 +215,8 @@ def main():
                     "",
                 ],
                 [
                     "artoria_pendragon_(fate),solo",
                     "excalibur_(fate/stay_night),green_eyes,monochrome,blonde_hair",
                     ["General", "Sensitive"],
@@ -172,8 +224,30 @@ def main():
                     "",
                     "",
                 ],
             ],
             inputs=[
                 positive_tags,
                 negative_tags,
                 selected_ratings,
@@ -190,6 +264,8 @@ def main():
         find_btn.click(
             fn=predictor.predict,
             inputs=[
                 positive_tags,
                 negative_tags,
                 selected_ratings,

 import numpy as np
 import pandas as pd
 import requests
+from imgutils.tagging import wd14
 from Models.CLIP import CLIP
+def combine_embeddings(pos_img_embs, pos_tags_embs, neg_img_embs, neg_tags_embs):
+    pos = pos_img_embs + pos_tags_embs
+    neg = neg_img_embs + neg_tags_embs
+    result = pos - neg
+    return result
 def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
     headers = {"User-Agent": "image_similarity_tool"}
     ratings_to_letters = {
     def predict(
         self,
+        pos_img_input,
+        neg_img_input,
         positive_tags,
         negative_tags,
         selected_ratings,
         num_classes = len(tags_df)
+        output_shape = model.out_units
+        pos_img_embs = np.zeros((1, output_shape), dtype=np.float32)
+        neg_img_embs = np.zeros((1, output_shape), dtype=np.float32)
+        pos_tags_embs = np.zeros((1, output_shape), dtype=np.float32)
+        neg_tags_embs = np.zeros((1, output_shape), dtype=np.float32)
         positive_tags = positive_tags.split(",")
         negative_tags = negative_tags.split(",")
         positive_tags_idxs = tags_df.index[tags_df["name"].isin(positive_tags)].tolist()
         negative_tags_idxs = tags_df.index[tags_df["name"].isin(negative_tags)].tolist()
+        if pos_img_input is not None:
+            pos_img_embs = wd14.get_wd14_tags(
+                pos_img_input,
+                model_name="ConvNext",
+                fmt=("embedding"),
+            )
+            pos_img_embs = np.expand_dims(pos_img_embs, 0)
+            faiss.normalize_L2(pos_img_embs)
+        if neg_img_input is not None:
+            neg_img_embs = wd14.get_wd14_tags(
+                neg_img_input,
+                model_name="ConvNext",
+                fmt=("embedding"),
+            )
+            neg_img_embs = np.expand_dims(neg_img_embs, 0)
+            faiss.normalize_L2(neg_img_embs)
+        if len(positive_tags_idxs) > 0:
+            tags = np.zeros((1, num_classes), dtype=np.float32)
+            tags[0][positive_tags_idxs] = 1
+            pos_tags_embs = model.apply(
+                {"params": self.params},
+                tags,
+                method=model.encode_text,
+            )
+            pos_tags_embs = jax.device_get(pos_tags_embs)
+            faiss.normalize_L2(pos_tags_embs)
         if len(negative_tags_idxs) > 0:
             tags = np.zeros((1, num_classes), dtype=np.float32)
             tags[0][negative_tags_idxs] = 1
+            neg_tags_embs = model.apply(
                 {"params": self.params},
                 tags,
                 method=model.encode_text,
             )
+            neg_tags_embs = jax.device_get(neg_tags_embs)
+            faiss.normalize_L2(neg_tags_embs)
+        embeddings = combine_embeddings(
+            pos_img_embs,
+            pos_tags_embs,
+            neg_img_embs,
+            neg_tags_embs,
+        )
+        dists, indexes = self.knn_index.search(embeddings, k=n_neighbours)
         neighbours_ids = self.images_ids[indexes][0]
         neighbours_ids = [int(x) for x in neighbours_ids]
     predictor = Predictor()
     with gr.Blocks() as demo:
+        with gr.Row():
+            pos_img_input = gr.Image(type="pil", label="Positive input")
+            neg_img_input = gr.Image(type="pil", label="Negative input")
         with gr.Row():
             with gr.Column():
                 positive_tags = gr.Textbox(label="Positive tags")
                 negative_tags = gr.Textbox(label="Negative tags")
+            with gr.Column():
+                selected_ratings = gr.CheckboxGroup(
+                    choices=["General", "Sensitive", "Questionable", "Explicit"],
+                    value=["General", "Sensitive"],
+                    label="Ratings",
+                )
                 n_neighbours = gr.Slider(
                     minimum=1,
                     maximum=20,
                     step=1,
                     label="# of images",
                 )
             with gr.Column():
                 api_username = gr.Textbox(label="Danbooru API Username")
                 api_key = gr.Textbox(label="Danbooru API Key")
         find_btn = gr.Button("Find similar images")
         similar_images = gr.Gallery(label="Similar images", columns=[5])
         examples = gr.Examples(
             [
                 [
+                    None,
+                    None,
                     "marcille_donato",
                     "",
                     ["General", "Sensitive"],
                     "",
                 ],
                 [
+                    None,
+                    None,
                     "yellow_eyes,red_horns",
                     "",
                     ["General", "Sensitive"],
                     "",
                 ],
                 [
+                    None,
+                    None,
                     "artoria_pendragon_(fate),solo",
                     "excalibur_(fate/stay_night),green_eyes,monochrome,blonde_hair",
                     ["General", "Sensitive"],
                     "",
                     "",
                 ],
+                [
+                    "examples/60378883_p0.jpg",
+                    None,
+                    "fujimaru_ritsuka_(female)",
+                    "solo",
+                    ["General", "Sensitive"],
+                    5,
+                    "",
+                    "",
+                ],
+                [
+                    "examples/DaRlExxUwAAcUOS-orig.jpg",
+                    "examples/46657164_p1.jpg",
+                    "",
+                    "",
+                    ["General", "Sensitive"],
+                    5,
+                    "",
+                    "",
+                ],
             ],
             inputs=[
+                pos_img_input,
+                neg_img_input,
                 positive_tags,
                 negative_tags,
                 selected_ratings,
         find_btn.click(
             fn=predictor.predict,
             inputs=[
+                pos_img_input,
+                neg_img_input,
                 positive_tags,
                 negative_tags,
                 selected_ratings,

examples/46657164_p1.jpg ADDED Viewed

examples/60378883_p0.jpg ADDED Viewed

examples/DaRlExxUwAAcUOS-orig.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 faiss-cpu
 jax[cpu]
 flax

 faiss-cpu
 jax[cpu]
 flax
+imgutils
+onnxruntime