99ashutosh commited on
Commit
f9f615c
·
1 Parent(s): 5ca9f5e

image test

Browse files
app.py CHANGED
@@ -3,23 +3,24 @@ import pandas as pd
3
  from annoy import AnnoyIndex
4
  from deepface import DeepFace
5
  import gdown
 
6
 
7
- def getSimilarImages():
8
- return []
9
 
10
- def my_inference_function(name):
11
- return "Hello " + name + "!"
 
 
12
 
13
- gradio_interface = gradio.Interface(
14
  fn = my_inference_function,
15
- inputs = "text",
16
  outputs = "text"
17
  )
18
 
19
  print("downloading dataset")
20
- id = "1z6amrk21M-4N39OZLOsi4WWjAdBVSjh7"
21
- gdown.download(id=id, output="final_dataframe.parquet.gzip", quiet=False)
22
 
23
  print("loading dataframe")
24
- df = pd.read_parquet("final_dataframe.parquet.gzip")
25
- gradio_interface.launch()
 
3
  from annoy import AnnoyIndex
4
  from deepface import DeepFace
5
  import gdown
6
+ from similarity_finder.get_similar_images import get_similar_images
7
 
 
 
8
 
9
+ def my_inference_function(image_path):
10
+ #global df
11
+ #get_similar_images(df, embedding, age, gender, race)
12
+ return "Hello " + image_path + "!"
13
 
14
+ gradio_interface = gradio.Interface(
15
  fn = my_inference_function,
16
+ inputs = gr.Image(type="filepath"),
17
  outputs = "text"
18
  )
19
 
20
  print("downloading dataset")
21
+ #id = "1z6amrk21M-4N39OZLOsi4WWjAdBVSjh7"
22
+ #gdown.download(id=id, output="final_dataframe.parquet.gzip", quiet=False)
23
 
24
  print("loading dataframe")
25
+ #df = pd.read_parquet("final_dataframe.parquet.gzip")
26
+ gradio_interface.launch(share=True)
similarity_finder/__init__.py ADDED
File without changes
similarity_finder/get_similar_images.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from annoy import AnnoyIndex
4
+ from deepface import DeepFace
5
+ import argparse
6
+
7
+ FILE_PATHS = {
8
+ "dataframe": {
9
+ "id": "1HFxHX2RkEr7_yVHnA-qk5Lj8CxOWrUda",
10
+ "name": "final_embeddings_clusters.parquet.gzip",
11
+ "path": "preprocessed_files"
12
+ },
13
+ "AnnoyIndex_Saved_File": {
14
+ "id": "14uIgsVAiGolTy3-TGWrUUXqEzJqh3ZMl",
15
+ "name": "CACD2000_refined_images_embeddings_clusters.ann",
16
+ "path": "preprocessed_files"
17
+ }
18
+ }
19
+
20
+ def download_file(file_id, file_name, save_path):
21
+ """Function to generate the urls for given params"""
22
+ url = r"""wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={FILE_ID}' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id={FILE_ID}" -O {SAVE_PATH}/{FILE_NAME} && rm -rf /tmp/cookies.txt""".format(
23
+ FILE_ID=file_id, FILE_NAME=file_name, SAVE_PATH=save_path
24
+ )
25
+ os.system(url)
26
+
27
+
28
+ """
29
+ Prepare env for using this file, use this function
30
+ if running code without backend.
31
+ """
32
+ def download_required_files():
33
+ programs = []
34
+ for key, details in MODEL_PATHS.items():
35
+ if not os.path.exists(details["path"]):
36
+ os.makedirs(details["path"])
37
+ proc = Process(target=download_file, args=(
38
+ details["id"], details["name"], details["path"],))
39
+ programs.append(proc)
40
+ proc.start()
41
+
42
+ for proc in programs:
43
+ proc.join()
44
+
45
+ return "Environent Ready!"
46
+
47
+ def get_similar_images_annoy(t, df, img_index, n=1000, max_dist=1.0):
48
+ vid, face = df.iloc[img_index, [0, 1]]
49
+ # print(vid)
50
+ similar_img_ids, dist = t.get_nns_by_item(img_index, n+1, include_distances=True)
51
+ similar_img_ids = [s for s,d in zip(similar_img_ids, dist) if (d <= max_dist and df['race'][s]==df['race'][img_index] and df['gender'][s]==df['gender'][img_index] and int(df['age'][s])<int(df['age'][img_index])+5 and int(df['age'][s])>int(df['age'][img_index])-5)][1:] # first item is always its own video
52
+ return vid, vid, df.iloc[similar_img_ids], dist
53
+
54
+ def get_sample_n_similar(t, df, sample_idx):
55
+ output_images = []
56
+ vid, face, similar, distances = get_similar_images_annoy(t, df, sample_idx)
57
+ list_plot = [face] + similar['face'].values.tolist()
58
+ list_cluster = [df.iloc[sample_idx]['cluster']] + similar['cluster'].values.tolist()
59
+ for face, cluster, dist in zip(list_plot, list_cluster, distances):
60
+ try:
61
+ output_images.append(f'{face.split("/")[-1][:-4]}.jpg')
62
+ except:
63
+ continue
64
+ return output_images[1:10]
65
+
66
+ def add_to_dataframe(embedding, age, gender, race, dataframe):
67
+ embedding_json = {}
68
+ embedding_json['face'] = "user_image"
69
+ #embedding_objs = DeepFace.represent(img_path = image_path)
70
+ embedding_json['embedding'] = embedding
71
+ embedding_json['age'] = age
72
+ embedding_json['gender'] = gender
73
+ embedding_json['race'] = race
74
+ _ = pd.json_normalize(embedding_json)
75
+ dataframe = pd.concat([_, dataframe], sort=False, ignore_index=True)
76
+
77
+ return dataframe
78
+
79
+ def get_similar_images(df, embedding, age, gender, race):
80
+ df = add_to_dataframe(embedding, age, gender, race, df)
81
+
82
+ f = len(df['embedding'][0])
83
+ t = AnnoyIndex(f, metric='euclidean')
84
+ ntree = 50
85
+
86
+ for i, vector in enumerate(df['embedding']):
87
+ t.add_item(i, vector)
88
+
89
+ _ = t.build(ntree)
90
+
91
+ results = get_sample_n_similar(t, df, 0)
92
+ return t, results
93
+
94
+ if __name__ == '__main__':
95
+ parser = argparse.ArgumentParser()
96
+ parser.parse_args()
97
+ parser.add_argument("image_file_path", help="Enter the apth of the image file that you need similar images for")
98
+ args = parser.parse_args()
99
+ image_path = str(args.image_file_path)
100
+ get_similar_images(image_path)
similarity_finder/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ annoy
2
+ pandas
3
+ numpy
similarity_finder/shell.nix ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ with import <nixpkgs> { };
2
+
3
+ let pythonPackages = python310Packages;
4
+ in pkgs.mkShell rec {
5
+ name = "similarity-finder-env";
6
+ venvDir = "./env";
7
+ buildInputs = [
8
+ stdenv.cc.cc.lib
9
+ stdenv.cc
10
+ pythonPackages.python
11
+ pythonPackages.venvShellHook
12
+ ];
13
+
14
+ LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
15
+
16
+ postVenvCreation = ''
17
+ pip install -r requirements.txt
18
+ '';
19
+ }