Spaces:

99ashutosh
/

find-similar-image

Runtime error

App Files Files Community

find-similar-image / similarity_finder /get_similar_images.py

99ashutosh

final fix

8ce051d almost 2 years ago

raw

history blame contribute delete

3.96 kB

	import pandas as pd
	import numpy as np
	from annoy import AnnoyIndex
	from deepface import DeepFace
	import argparse

	FILE_PATHS = {
	"dataframe": {
	"id": "1HFxHX2RkEr7_yVHnA-qk5Lj8CxOWrUda",
	"name": "final_embeddings_clusters.parquet.gzip",
	"path": "preprocessed_files"
	},
	"AnnoyIndex_Saved_File": {
	"id": "14uIgsVAiGolTy3-TGWrUUXqEzJqh3ZMl",
	"name": "CACD2000_refined_images_embeddings_clusters.ann",
	"path": "preprocessed_files"
	}
	}

	def download_file(file_id, file_name, save_path):
	"""Function to generate the urls for given params"""
	url = r"""wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id={FILE_ID}' -O- \| sed -rn 's/.confirm=([0-9A-Za-z_]+)./\1\n/p')&id={FILE_ID}" -O {SAVE_PATH}/{FILE_NAME} && rm -rf /tmp/cookies.txt""".format(
	FILE_ID=file_id, FILE_NAME=file_name, SAVE_PATH=save_path
	)
	os.system(url)


	"""
	Prepare env for using this file, use this function
	if running code without backend.
	"""
	def download_required_files():
	programs = []
	for key, details in MODEL_PATHS.items():
	if not os.path.exists(details["path"]):
	os.makedirs(details["path"])
	proc = Process(target=download_file, args=(
	details["id"], details["name"], details["path"],))
	programs.append(proc)
	proc.start()

	for proc in programs:
	proc.join()

	return "Environent Ready!"

	def get_similar_images_annoy(t, df, img_index, n=1000, max_dist=1.0):
	vid, face = df.iloc[img_index, [0, 1]]
	# print(vid)
	similar_img_ids, dist = t.get_nns_by_item(img_index, n+1, include_distances=True)
	similar_img_ids = [s for s,d in zip(similar_img_ids, dist) if (d <= max_dist and df['race'][s]==df['race'][img_index] and df['gender'][s]==df['gender'][img_index] and int(df['age'][s])<int(df['age'][img_index])+5 and int(df['age'][s])>int(df['age'][img_index])-5)][1:] # first item is always its own video
	return vid, vid, df.iloc[similar_img_ids], dist

	def get_sample_n_similar(t, df, sample_idx):
	output_images = []
	vid, face, similar, distances = get_similar_images_annoy(t, df, sample_idx)
	list_plot = [face] + similar['face'].values.tolist()
	list_cluster = [df.iloc[sample_idx]['cluster']] + similar['cluster'].values.tolist()
	for face, cluster, dist in zip(list_plot, list_cluster, distances):
	try:
	output_images.append(f'{face.split("/")[-1][:-4]}.jpg')
	except:
	continue
	return output_images[1:10]

	def add_to_dataframe(image_path, age, gender, race, dataframe):
	embedding_json = {}
	embedding_json['face'] = "user_image"
	embedding_objs = DeepFace.represent(img_path = image_path)
	embedding_json['embedding'] = embedding_objs[0]["embedding"]
	embedding_json['age'] = age
	embedding_json['gender'] = gender
	embedding_json['race'] = race
	_ = pd.json_normalize(embedding_json)
	dataframe = pd.concat([_, dataframe], sort=False, ignore_index=True)

	return dataframe

	def get_similar_images(df, image_path, age, gender, race):
	df = add_to_dataframe(image_path, age, gender, race, df)

	f = len(df['embedding'][0])
	t = AnnoyIndex(f, metric='euclidean')
	ntree = 50

	for i, vector in enumerate(df['embedding']):
	t.add_item(i, vector)

	_ = t.build(ntree)

	results = get_sample_n_similar(t, df, 0)
	df = df.drop([0])
	return results

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.parse_args()
	parser.add_argument("image_file_path", help="Enter the apth of the image file that you need similar images for")
	args = parser.parse_args()
	image_path = str(args.image_file_path)
	get_similar_images(image_path)