Spaces:

danielvarga
/

se

Sleeping

App Files Files Community

Daniel Varga commited on May 3, 2023

Commit

1ce3798

1 Parent(s): 40a7c0e

parameters, no-thumbs option, doc.

Browse files

Files changed (3) hide show

app.py +6 -3
create_embeddings.py +26 -18
readme.sh +17 -0

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import numpy as np
 import pickle
@@ -5,8 +6,11 @@ import clip
 import torch
-data = pickle.load(open("embeddings_nothumb.pkl", "rb"))
 embeddings = data["embeddings"]
 image_features = torch.Tensor(embeddings)
 image_features /= image_features.norm(dim=-1, keepdim=True)
@@ -16,7 +20,6 @@ n, d = embeddings.shape
 filenames = data["filenames"]
-base_url = "https://static.renyi.hu/ai-shared/daniel/sameenergy/index/"
 urls = [base_url + filename for filename in filenames]
 model, preprocess = clip.load('RN50')
@@ -83,4 +86,4 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
 if __name__ == "__main__":
-    demo.launch()

+import sys
 import gradio as gr
 import numpy as np
 import pickle
 import torch
+pickle_filename, base_url = sys.argv[1:]
+# base_url = "https://static.renyi.hu/ai-shared/daniel/sameenergy/"
+data = pickle.load(open(pickle_filename, "rb"))
 embeddings = data["embeddings"]
 image_features = torch.Tensor(embeddings)
 image_features /= image_features.norm(dim=-1, keepdim=True)
 filenames = data["filenames"]
 urls = [base_url + filename for filename in filenames]
 model, preprocess = clip.load('RN50')
 if __name__ == "__main__":
+    demo.launch(height=3000)

create_embeddings.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import numpy as np
 import torch
 from PIL import Image
@@ -6,12 +7,6 @@ import clip
 import pickle
-model, preprocess = clip.load('RN50')
-# model, preprocess = clip.load('ViT-L/14@336px')
-limit = 1e9
-batch_size = 100
 def do_batch(batch, embeddings):
     image_batch = torch.tensor(np.stack(batch))
     with torch.no_grad():
@@ -20,26 +15,36 @@ def do_batch(batch, embeddings):
         print(f"{len(embeddings)} done")
-workdir = "./index"
-indx = os.listdir(workdir)
 embeddings = []
 filenames = []
 thumbs = []
 print("starting processing")
 batch = []
-for filename in indx:
-    if filename.lower().endswith("jpg"):
-        full_filename = os.path.join(workdir, filename)
-        rgb = Image.open(full_filename).convert("RGB")
         img = preprocess(rgb)
-        rgb.thumbnail((128, 128))
-        thumb = np.array(rgb)
         batch.append(img)
         if len(batch) >= batch_size:
             do_batch(batch, embeddings)
             batch = []
         filenames.append(filename)
-        thumbs.append(thumb)
         if len(filenames) >= limit:
             break
@@ -48,10 +53,13 @@ if len(batch) > 0:
     do_batch(batch, embeddings)
 embeddings = np.array(embeddings)
-assert len(embeddings) == len(filenames) == len(thumbs)
 print(f"processed {len(embeddings)} images")
-data = {"embeddings": embeddings, "filenames": filenames, "thumbs": thumbs}
-with open("embeddings.pkl", "wb") as f:
     pickle.dump(data, f)

 import os
+import sys
 import numpy as np
 import torch
 from PIL import Image
 import pickle
 def do_batch(batch, embeddings):
     image_batch = torch.tensor(np.stack(batch))
     with torch.no_grad():
         print(f"{len(embeddings)} done")
+model, preprocess = clip.load('RN50')
+limit = 1e9
+batch_size = 100
+output_filename = sys.argv[1]
+assert output_filename.endswith("pkl"), "first argument is the output pickle"
+assert sys.argv[2] in ("thumbs", "no-thumbs"), "second argument either thumbs or no-thumbs"
+do_thumbs = sys.argv[2] == "thumbs"
 embeddings = []
 filenames = []
 thumbs = []
 print("starting processing")
 batch = []
+for filename in sys.stdin:
+    filename = filename.rstrip()
+    if filename.lower().endswith("jpg") or filename.lower().endswith("jpeg"):
+        rgb = Image.open(filename).convert("RGB")
         img = preprocess(rgb)
         batch.append(img)
         if len(batch) >= batch_size:
             do_batch(batch, embeddings)
             batch = []
         filenames.append(filename)
+        if do_thumbs:
+            rgb.thumbnail((128, 128))
+            thumb = np.array(rgb)
+            thumbs.append(thumb)
         if len(filenames) >= limit:
             break
     do_batch(batch, embeddings)
 embeddings = np.array(embeddings)
+assert len(embeddings) == len(filenames)
 print(f"processed {len(embeddings)} images")
+data = {"embeddings": embeddings, "filenames": filenames}
+if do_thumbs:
+    assert len(embeddings) == len(thumbs)
+    data["thumbs"] = thumbs
+with open(output_filename, "wb") as f:
     pickle.dump(data, f)

readme.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+# lots of images moved to directory sample_fbi_s1e1
+# list them:
+find sample_fbi_s1e1 | grep "jpg\|JPG\|jpeg$" > sample_fbi_s1e1.txt
+# copy them to public:
+scp -q -r -P 2820 sample_fbi_s1e1 hexagon.renyi.hu:./ai-shared/daniel/sameenergy/
+# example URL:
+# https://static.renyi.hu/ai-shared/daniel/sameenergy/sample_fbi_s1e1/x_BRIDGE_ADRIATIC/Dobogoko_Esztergom/Videk_ut_Dobogoko_Esztergom_014.jpg
+# run CLIP:
+cat sample_fbi_s1e1.txt | python create_embeddings.py sample_fbi_s1e1.pkl no-thumbs
+# -> sample_fbi_s1e1.pkl contains embeddings and filenames.
+# gradio app:
+python app.py sample_fbi_s1e1.pkl https://static.renyi.hu/ai-shared/daniel/sameenergy/