Spaces:
Runtime error
Runtime error
Commit
·
f9f615c
1
Parent(s):
5ca9f5e
image test
Browse files- app.py +11 -10
- similarity_finder/__init__.py +0 -0
- similarity_finder/get_similar_images.py +100 -0
- similarity_finder/requirements.txt +3 -0
- similarity_finder/shell.nix +19 -0
app.py
CHANGED
@@ -3,23 +3,24 @@ import pandas as pd
|
|
3 |
from annoy import AnnoyIndex
|
4 |
from deepface import DeepFace
|
5 |
import gdown
|
|
|
6 |
|
7 |
-
def getSimilarImages():
|
8 |
-
return []
|
9 |
|
10 |
-
def my_inference_function(
|
11 |
-
|
|
|
|
|
12 |
|
13 |
-
gradio_interface = gradio.Interface(
|
14 |
fn = my_inference_function,
|
15 |
-
inputs = "
|
16 |
outputs = "text"
|
17 |
)
|
18 |
|
19 |
print("downloading dataset")
|
20 |
-
id = "1z6amrk21M-4N39OZLOsi4WWjAdBVSjh7"
|
21 |
-
gdown.download(id=id, output="final_dataframe.parquet.gzip", quiet=False)
|
22 |
|
23 |
print("loading dataframe")
|
24 |
-
df = pd.read_parquet("final_dataframe.parquet.gzip")
|
25 |
-
gradio_interface.launch()
|
|
|
3 |
from annoy import AnnoyIndex
|
4 |
from deepface import DeepFace
|
5 |
import gdown
|
6 |
+
from similarity_finder.get_similar_images import get_similar_images
|
7 |
|
|
|
|
|
8 |
|
9 |
+
def my_inference_function(image_path):
|
10 |
+
#global df
|
11 |
+
#get_similar_images(df, embedding, age, gender, race)
|
12 |
+
return "Hello " + image_path + "!"
|
13 |
|
14 |
+
gradio_interface = gradio.Interface(
|
15 |
fn = my_inference_function,
|
16 |
+
inputs = gr.Image(type="filepath"),
|
17 |
outputs = "text"
|
18 |
)
|
19 |
|
20 |
print("downloading dataset")
|
21 |
+
#id = "1z6amrk21M-4N39OZLOsi4WWjAdBVSjh7"
|
22 |
+
#gdown.download(id=id, output="final_dataframe.parquet.gzip", quiet=False)
|
23 |
|
24 |
print("loading dataframe")
|
25 |
+
#df = pd.read_parquet("final_dataframe.parquet.gzip")
|
26 |
+
gradio_interface.launch(share=True)
|
similarity_finder/__init__.py
ADDED
File without changes
|
similarity_finder/get_similar_images.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from annoy import AnnoyIndex
|
4 |
+
from deepface import DeepFace
|
5 |
+
import argparse
|
6 |
+
|
7 |
+
FILE_PATHS = {
|
8 |
+
"dataframe": {
|
9 |
+
"id": "1HFxHX2RkEr7_yVHnA-qk5Lj8CxOWrUda",
|
10 |
+
"name": "final_embeddings_clusters.parquet.gzip",
|
11 |
+
"path": "preprocessed_files"
|
12 |
+
},
|
13 |
+
"AnnoyIndex_Saved_File": {
|
14 |
+
"id": "14uIgsVAiGolTy3-TGWrUUXqEzJqh3ZMl",
|
15 |
+
"name": "CACD2000_refined_images_embeddings_clusters.ann",
|
16 |
+
"path": "preprocessed_files"
|
17 |
+
}
|
18 |
+
}
|
19 |
+
|
20 |
+
def download_file(file_id, file_name, save_path):
|
21 |
+
"""Function to generate the urls for given params"""
|
22 |
+
url = r"""wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={FILE_ID}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={FILE_ID}" -O {SAVE_PATH}/{FILE_NAME} && rm -rf /tmp/cookies.txt""".format(
|
23 |
+
FILE_ID=file_id, FILE_NAME=file_name, SAVE_PATH=save_path
|
24 |
+
)
|
25 |
+
os.system(url)
|
26 |
+
|
27 |
+
|
28 |
+
"""
|
29 |
+
Prepare env for using this file, use this function
|
30 |
+
if running code without backend.
|
31 |
+
"""
|
32 |
+
def download_required_files():
|
33 |
+
programs = []
|
34 |
+
for key, details in MODEL_PATHS.items():
|
35 |
+
if not os.path.exists(details["path"]):
|
36 |
+
os.makedirs(details["path"])
|
37 |
+
proc = Process(target=download_file, args=(
|
38 |
+
details["id"], details["name"], details["path"],))
|
39 |
+
programs.append(proc)
|
40 |
+
proc.start()
|
41 |
+
|
42 |
+
for proc in programs:
|
43 |
+
proc.join()
|
44 |
+
|
45 |
+
return "Environent Ready!"
|
46 |
+
|
47 |
+
def get_similar_images_annoy(t, df, img_index, n=1000, max_dist=1.0):
|
48 |
+
vid, face = df.iloc[img_index, [0, 1]]
|
49 |
+
# print(vid)
|
50 |
+
similar_img_ids, dist = t.get_nns_by_item(img_index, n+1, include_distances=True)
|
51 |
+
similar_img_ids = [s for s,d in zip(similar_img_ids, dist) if (d <= max_dist and df['race'][s]==df['race'][img_index] and df['gender'][s]==df['gender'][img_index] and int(df['age'][s])<int(df['age'][img_index])+5 and int(df['age'][s])>int(df['age'][img_index])-5)][1:] # first item is always its own video
|
52 |
+
return vid, vid, df.iloc[similar_img_ids], dist
|
53 |
+
|
54 |
+
def get_sample_n_similar(t, df, sample_idx):
|
55 |
+
output_images = []
|
56 |
+
vid, face, similar, distances = get_similar_images_annoy(t, df, sample_idx)
|
57 |
+
list_plot = [face] + similar['face'].values.tolist()
|
58 |
+
list_cluster = [df.iloc[sample_idx]['cluster']] + similar['cluster'].values.tolist()
|
59 |
+
for face, cluster, dist in zip(list_plot, list_cluster, distances):
|
60 |
+
try:
|
61 |
+
output_images.append(f'{face.split("/")[-1][:-4]}.jpg')
|
62 |
+
except:
|
63 |
+
continue
|
64 |
+
return output_images[1:10]
|
65 |
+
|
66 |
+
def add_to_dataframe(embedding, age, gender, race, dataframe):
|
67 |
+
embedding_json = {}
|
68 |
+
embedding_json['face'] = "user_image"
|
69 |
+
#embedding_objs = DeepFace.represent(img_path = image_path)
|
70 |
+
embedding_json['embedding'] = embedding
|
71 |
+
embedding_json['age'] = age
|
72 |
+
embedding_json['gender'] = gender
|
73 |
+
embedding_json['race'] = race
|
74 |
+
_ = pd.json_normalize(embedding_json)
|
75 |
+
dataframe = pd.concat([_, dataframe], sort=False, ignore_index=True)
|
76 |
+
|
77 |
+
return dataframe
|
78 |
+
|
79 |
+
def get_similar_images(df, embedding, age, gender, race):
|
80 |
+
df = add_to_dataframe(embedding, age, gender, race, df)
|
81 |
+
|
82 |
+
f = len(df['embedding'][0])
|
83 |
+
t = AnnoyIndex(f, metric='euclidean')
|
84 |
+
ntree = 50
|
85 |
+
|
86 |
+
for i, vector in enumerate(df['embedding']):
|
87 |
+
t.add_item(i, vector)
|
88 |
+
|
89 |
+
_ = t.build(ntree)
|
90 |
+
|
91 |
+
results = get_sample_n_similar(t, df, 0)
|
92 |
+
return t, results
|
93 |
+
|
94 |
+
if __name__ == '__main__':
|
95 |
+
parser = argparse.ArgumentParser()
|
96 |
+
parser.parse_args()
|
97 |
+
parser.add_argument("image_file_path", help="Enter the apth of the image file that you need similar images for")
|
98 |
+
args = parser.parse_args()
|
99 |
+
image_path = str(args.image_file_path)
|
100 |
+
get_similar_images(image_path)
|
similarity_finder/requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
annoy
|
2 |
+
pandas
|
3 |
+
numpy
|
similarity_finder/shell.nix
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
with import <nixpkgs> { };
|
2 |
+
|
3 |
+
let pythonPackages = python310Packages;
|
4 |
+
in pkgs.mkShell rec {
|
5 |
+
name = "similarity-finder-env";
|
6 |
+
venvDir = "./env";
|
7 |
+
buildInputs = [
|
8 |
+
stdenv.cc.cc.lib
|
9 |
+
stdenv.cc
|
10 |
+
pythonPackages.python
|
11 |
+
pythonPackages.venvShellHook
|
12 |
+
];
|
13 |
+
|
14 |
+
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
|
15 |
+
|
16 |
+
postVenvCreation = ''
|
17 |
+
pip install -r requirements.txt
|
18 |
+
'';
|
19 |
+
}
|