Daniel Varga commited on
Commit
1ce3798
·
1 Parent(s): 40a7c0e

parameters, no-thumbs option, doc.

Browse files
Files changed (3) hide show
  1. app.py +6 -3
  2. create_embeddings.py +26 -18
  3. readme.sh +17 -0
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import numpy as np
3
  import pickle
@@ -5,8 +6,11 @@ import clip
5
  import torch
6
 
7
 
 
 
8
 
9
- data = pickle.load(open("embeddings_nothumb.pkl", "rb"))
 
10
  embeddings = data["embeddings"]
11
  image_features = torch.Tensor(embeddings)
12
  image_features /= image_features.norm(dim=-1, keepdim=True)
@@ -16,7 +20,6 @@ n, d = embeddings.shape
16
 
17
  filenames = data["filenames"]
18
 
19
- base_url = "https://static.renyi.hu/ai-shared/daniel/sameenergy/index/"
20
  urls = [base_url + filename for filename in filenames]
21
 
22
  model, preprocess = clip.load('RN50')
@@ -83,4 +86,4 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
83
 
84
 
85
  if __name__ == "__main__":
86
- demo.launch()
 
1
+ import sys
2
  import gradio as gr
3
  import numpy as np
4
  import pickle
 
6
  import torch
7
 
8
 
9
+ pickle_filename, base_url = sys.argv[1:]
10
+ # base_url = "https://static.renyi.hu/ai-shared/daniel/sameenergy/"
11
 
12
+
13
+ data = pickle.load(open(pickle_filename, "rb"))
14
  embeddings = data["embeddings"]
15
  image_features = torch.Tensor(embeddings)
16
  image_features /= image_features.norm(dim=-1, keepdim=True)
 
20
 
21
  filenames = data["filenames"]
22
 
 
23
  urls = [base_url + filename for filename in filenames]
24
 
25
  model, preprocess = clip.load('RN50')
 
86
 
87
 
88
  if __name__ == "__main__":
89
+ demo.launch(height=3000)
create_embeddings.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import numpy as np
3
  import torch
4
  from PIL import Image
@@ -6,12 +7,6 @@ import clip
6
  import pickle
7
 
8
 
9
- model, preprocess = clip.load('RN50')
10
- # model, preprocess = clip.load('ViT-L/14@336px')
11
-
12
- limit = 1e9
13
- batch_size = 100
14
-
15
  def do_batch(batch, embeddings):
16
  image_batch = torch.tensor(np.stack(batch))
17
  with torch.no_grad():
@@ -20,26 +15,36 @@ def do_batch(batch, embeddings):
20
  print(f"{len(embeddings)} done")
21
 
22
 
23
- workdir = "./index"
24
- indx = os.listdir(workdir)
 
 
 
 
 
 
 
 
 
25
  embeddings = []
26
  filenames = []
27
  thumbs = []
28
  print("starting processing")
29
  batch = []
30
- for filename in indx:
31
- if filename.lower().endswith("jpg"):
32
- full_filename = os.path.join(workdir, filename)
33
- rgb = Image.open(full_filename).convert("RGB")
34
  img = preprocess(rgb)
35
- rgb.thumbnail((128, 128))
36
- thumb = np.array(rgb)
37
  batch.append(img)
38
  if len(batch) >= batch_size:
39
  do_batch(batch, embeddings)
40
  batch = []
41
  filenames.append(filename)
42
- thumbs.append(thumb)
 
 
 
43
  if len(filenames) >= limit:
44
  break
45
 
@@ -48,10 +53,13 @@ if len(batch) > 0:
48
  do_batch(batch, embeddings)
49
 
50
  embeddings = np.array(embeddings)
51
- assert len(embeddings) == len(filenames) == len(thumbs)
52
  print(f"processed {len(embeddings)} images")
53
 
54
- data = {"embeddings": embeddings, "filenames": filenames, "thumbs": thumbs}
 
 
 
55
 
56
- with open("embeddings.pkl", "wb") as f:
57
  pickle.dump(data, f)
 
1
  import os
2
+ import sys
3
  import numpy as np
4
  import torch
5
  from PIL import Image
 
7
  import pickle
8
 
9
 
 
 
 
 
 
 
10
  def do_batch(batch, embeddings):
11
  image_batch = torch.tensor(np.stack(batch))
12
  with torch.no_grad():
 
15
  print(f"{len(embeddings)} done")
16
 
17
 
18
+ model, preprocess = clip.load('RN50')
19
+
20
+ limit = 1e9
21
+ batch_size = 100
22
+
23
+
24
+ output_filename = sys.argv[1]
25
+ assert output_filename.endswith("pkl"), "first argument is the output pickle"
26
+ assert sys.argv[2] in ("thumbs", "no-thumbs"), "second argument either thumbs or no-thumbs"
27
+ do_thumbs = sys.argv[2] == "thumbs"
28
+
29
  embeddings = []
30
  filenames = []
31
  thumbs = []
32
  print("starting processing")
33
  batch = []
34
+ for filename in sys.stdin:
35
+ filename = filename.rstrip()
36
+ if filename.lower().endswith("jpg") or filename.lower().endswith("jpeg"):
37
+ rgb = Image.open(filename).convert("RGB")
38
  img = preprocess(rgb)
 
 
39
  batch.append(img)
40
  if len(batch) >= batch_size:
41
  do_batch(batch, embeddings)
42
  batch = []
43
  filenames.append(filename)
44
+ if do_thumbs:
45
+ rgb.thumbnail((128, 128))
46
+ thumb = np.array(rgb)
47
+ thumbs.append(thumb)
48
  if len(filenames) >= limit:
49
  break
50
 
 
53
  do_batch(batch, embeddings)
54
 
55
  embeddings = np.array(embeddings)
56
+ assert len(embeddings) == len(filenames)
57
  print(f"processed {len(embeddings)} images")
58
 
59
+ data = {"embeddings": embeddings, "filenames": filenames}
60
+ if do_thumbs:
61
+ assert len(embeddings) == len(thumbs)
62
+ data["thumbs"] = thumbs
63
 
64
+ with open(output_filename, "wb") as f:
65
  pickle.dump(data, f)
readme.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # lots of images moved to directory sample_fbi_s1e1
3
+
4
+ # list them:
5
+ find sample_fbi_s1e1 | grep "jpg\|JPG\|jpeg$" > sample_fbi_s1e1.txt
6
+
7
+ # copy them to public:
8
+ scp -q -r -P 2820 sample_fbi_s1e1 hexagon.renyi.hu:./ai-shared/daniel/sameenergy/
9
+ # example URL:
10
+ # https://static.renyi.hu/ai-shared/daniel/sameenergy/sample_fbi_s1e1/x_BRIDGE_ADRIATIC/Dobogoko_Esztergom/Videk_ut_Dobogoko_Esztergom_014.jpg
11
+
12
+ # run CLIP:
13
+ cat sample_fbi_s1e1.txt | python create_embeddings.py sample_fbi_s1e1.pkl no-thumbs
14
+ # -> sample_fbi_s1e1.pkl contains embeddings and filenames.
15
+
16
+ # gradio app:
17
+ python app.py sample_fbi_s1e1.pkl https://static.renyi.hu/ai-shared/daniel/sameenergy/