Spaces:
Sleeping
Sleeping
Daniel Varga
commited on
Commit
·
1ce3798
1
Parent(s):
40a7c0e
parameters, no-thumbs option, doc.
Browse files- app.py +6 -3
- create_embeddings.py +26 -18
- readme.sh +17 -0
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import pickle
|
@@ -5,8 +6,11 @@ import clip
|
|
5 |
import torch
|
6 |
|
7 |
|
|
|
|
|
8 |
|
9 |
-
|
|
|
10 |
embeddings = data["embeddings"]
|
11 |
image_features = torch.Tensor(embeddings)
|
12 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
@@ -16,7 +20,6 @@ n, d = embeddings.shape
|
|
16 |
|
17 |
filenames = data["filenames"]
|
18 |
|
19 |
-
base_url = "https://static.renyi.hu/ai-shared/daniel/sameenergy/index/"
|
20 |
urls = [base_url + filename for filename in filenames]
|
21 |
|
22 |
model, preprocess = clip.load('RN50')
|
@@ -83,4 +86,4 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
|
|
83 |
|
84 |
|
85 |
if __name__ == "__main__":
|
86 |
-
demo.launch()
|
|
|
1 |
+
import sys
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
import pickle
|
|
|
6 |
import torch
|
7 |
|
8 |
|
9 |
+
pickle_filename, base_url = sys.argv[1:]
|
10 |
+
# base_url = "https://static.renyi.hu/ai-shared/daniel/sameenergy/"
|
11 |
|
12 |
+
|
13 |
+
data = pickle.load(open(pickle_filename, "rb"))
|
14 |
embeddings = data["embeddings"]
|
15 |
image_features = torch.Tensor(embeddings)
|
16 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
|
|
20 |
|
21 |
filenames = data["filenames"]
|
22 |
|
|
|
23 |
urls = [base_url + filename for filename in filenames]
|
24 |
|
25 |
model, preprocess = clip.load('RN50')
|
|
|
86 |
|
87 |
|
88 |
if __name__ == "__main__":
|
89 |
+
demo.launch(height=3000)
|
create_embeddings.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import numpy as np
|
3 |
import torch
|
4 |
from PIL import Image
|
@@ -6,12 +7,6 @@ import clip
|
|
6 |
import pickle
|
7 |
|
8 |
|
9 |
-
model, preprocess = clip.load('RN50')
|
10 |
-
# model, preprocess = clip.load('ViT-L/14@336px')
|
11 |
-
|
12 |
-
limit = 1e9
|
13 |
-
batch_size = 100
|
14 |
-
|
15 |
def do_batch(batch, embeddings):
|
16 |
image_batch = torch.tensor(np.stack(batch))
|
17 |
with torch.no_grad():
|
@@ -20,26 +15,36 @@ def do_batch(batch, embeddings):
|
|
20 |
print(f"{len(embeddings)} done")
|
21 |
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
embeddings = []
|
26 |
filenames = []
|
27 |
thumbs = []
|
28 |
print("starting processing")
|
29 |
batch = []
|
30 |
-
for filename in
|
31 |
-
|
32 |
-
|
33 |
-
rgb = Image.open(
|
34 |
img = preprocess(rgb)
|
35 |
-
rgb.thumbnail((128, 128))
|
36 |
-
thumb = np.array(rgb)
|
37 |
batch.append(img)
|
38 |
if len(batch) >= batch_size:
|
39 |
do_batch(batch, embeddings)
|
40 |
batch = []
|
41 |
filenames.append(filename)
|
42 |
-
|
|
|
|
|
|
|
43 |
if len(filenames) >= limit:
|
44 |
break
|
45 |
|
@@ -48,10 +53,13 @@ if len(batch) > 0:
|
|
48 |
do_batch(batch, embeddings)
|
49 |
|
50 |
embeddings = np.array(embeddings)
|
51 |
-
assert len(embeddings) == len(filenames)
|
52 |
print(f"processed {len(embeddings)} images")
|
53 |
|
54 |
-
data = {"embeddings": embeddings, "filenames": filenames
|
|
|
|
|
|
|
55 |
|
56 |
-
with open(
|
57 |
pickle.dump(data, f)
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
from PIL import Image
|
|
|
7 |
import pickle
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def do_batch(batch, embeddings):
|
11 |
image_batch = torch.tensor(np.stack(batch))
|
12 |
with torch.no_grad():
|
|
|
15 |
print(f"{len(embeddings)} done")
|
16 |
|
17 |
|
18 |
+
model, preprocess = clip.load('RN50')
|
19 |
+
|
20 |
+
limit = 1e9
|
21 |
+
batch_size = 100
|
22 |
+
|
23 |
+
|
24 |
+
output_filename = sys.argv[1]
|
25 |
+
assert output_filename.endswith("pkl"), "first argument is the output pickle"
|
26 |
+
assert sys.argv[2] in ("thumbs", "no-thumbs"), "second argument either thumbs or no-thumbs"
|
27 |
+
do_thumbs = sys.argv[2] == "thumbs"
|
28 |
+
|
29 |
embeddings = []
|
30 |
filenames = []
|
31 |
thumbs = []
|
32 |
print("starting processing")
|
33 |
batch = []
|
34 |
+
for filename in sys.stdin:
|
35 |
+
filename = filename.rstrip()
|
36 |
+
if filename.lower().endswith("jpg") or filename.lower().endswith("jpeg"):
|
37 |
+
rgb = Image.open(filename).convert("RGB")
|
38 |
img = preprocess(rgb)
|
|
|
|
|
39 |
batch.append(img)
|
40 |
if len(batch) >= batch_size:
|
41 |
do_batch(batch, embeddings)
|
42 |
batch = []
|
43 |
filenames.append(filename)
|
44 |
+
if do_thumbs:
|
45 |
+
rgb.thumbnail((128, 128))
|
46 |
+
thumb = np.array(rgb)
|
47 |
+
thumbs.append(thumb)
|
48 |
if len(filenames) >= limit:
|
49 |
break
|
50 |
|
|
|
53 |
do_batch(batch, embeddings)
|
54 |
|
55 |
embeddings = np.array(embeddings)
|
56 |
+
assert len(embeddings) == len(filenames)
|
57 |
print(f"processed {len(embeddings)} images")
|
58 |
|
59 |
+
data = {"embeddings": embeddings, "filenames": filenames}
|
60 |
+
if do_thumbs:
|
61 |
+
assert len(embeddings) == len(thumbs)
|
62 |
+
data["thumbs"] = thumbs
|
63 |
|
64 |
+
with open(output_filename, "wb") as f:
|
65 |
pickle.dump(data, f)
|
readme.sh
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# lots of images moved to directory sample_fbi_s1e1
|
3 |
+
|
4 |
+
# list them:
|
5 |
+
find sample_fbi_s1e1 | grep "jpg\|JPG\|jpeg$" > sample_fbi_s1e1.txt
|
6 |
+
|
7 |
+
# copy them to public:
|
8 |
+
scp -q -r -P 2820 sample_fbi_s1e1 hexagon.renyi.hu:./ai-shared/daniel/sameenergy/
|
9 |
+
# example URL:
|
10 |
+
# https://static.renyi.hu/ai-shared/daniel/sameenergy/sample_fbi_s1e1/x_BRIDGE_ADRIATIC/Dobogoko_Esztergom/Videk_ut_Dobogoko_Esztergom_014.jpg
|
11 |
+
|
12 |
+
# run CLIP:
|
13 |
+
cat sample_fbi_s1e1.txt | python create_embeddings.py sample_fbi_s1e1.pkl no-thumbs
|
14 |
+
# -> sample_fbi_s1e1.pkl contains embeddings and filenames.
|
15 |
+
|
16 |
+
# gradio app:
|
17 |
+
python app.py sample_fbi_s1e1.pkl https://static.renyi.hu/ai-shared/daniel/sameenergy/
|