JoJosmin commited on
Commit
65364e8
ยท
verified ยท
1 Parent(s): 6c6cdc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -22
app.py CHANGED
@@ -11,6 +11,7 @@ import chromadb
11
  from sklearn.metrics.pairwise import euclidean_distances
12
  from sklearn.preprocessing import normalize
13
  from sklearn.metrics.pairwise import cosine_similarity
 
14
 
15
  # Load segmentation model
16
  segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")
@@ -44,6 +45,22 @@ def load_image_from_url(url, max_retries=3):
44
  else:
45
  return None
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # ์„ธ๊ทธ๋จผํŠธ ๋งˆ์Šคํฌ ๊ธฐ๋ฐ˜ ์ž„๋ฒ ๋”ฉ ์ถ”์ถœ
48
  def get_segmented_embedding(img, final_mask):
49
  img_array = np.array(img)
@@ -112,32 +129,18 @@ def segment_clothing(img, clothes=["Hat", "Upper-clothes", "Skirt", "Pants", "Dr
112
 
113
  # return structured_results
114
 
115
- def find_similar_images(query_embedding, collection, top_k=5):
116
- # ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ž„๋ฒ ๋”ฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
117
- results = collection.query(
118
- query_embeddings=query_embedding.reshape(1, -1), # 2D ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜
119
- n_results=top_k,
120
- include=['metadatas', 'embeddings']
121
- )
122
 
123
- # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์™€ ์ž„๋ฒ ๋”ฉ์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
124
- top_metadatas = results['metadatas'][0]
125
- top_embeddings = np.array(results['embeddings'][0]) # numpy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜
126
-
127
- # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ์ •๊ทœํ™”
128
- query_embedding_normalized = normalize(query_embedding.reshape(1, -1), axis=1)
129
-
130
- # ์ž„๋ฒ ๋”ฉ ์ •๊ทœํ™”
131
- top_embeddings_normalized = normalize(top_embeddings, axis=1)
132
-
133
- # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
134
- similarities = cosine_similarity(query_embedding_normalized, top_embeddings_normalized).flatten()
135
-
136
  structured_results = []
137
- for metadata, similarity in zip(top_metadatas, similarities):
 
 
138
  structured_results.append({
139
  'info': metadata,
140
- 'similarity': similarity # ์œ ์‚ฌ๋„๋Š” ์ด๋ฏธ ๊ณ„์‚ฐ๋œ ๊ฐ’
141
  })
142
 
143
  return structured_results
 
11
  from sklearn.metrics.pairwise import euclidean_distances
12
  from sklearn.preprocessing import normalize
13
  from sklearn.metrics.pairwise import cosine_similarity
14
+ import faiss
15
 
16
  # Load segmentation model
17
  segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")
 
45
  else:
46
  return None
47
 
48
+ def initialize_faiss_index(collection):
49
+ # ๋ชจ๋“  ์ž„๋ฒ ๋”ฉ์„ ๊ฐ€์ ธ์™€ numpy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜
50
+ all_data = collection.get(include=['embeddings', 'metadatas'])
51
+ all_embeddings = np.array(all_data['embeddings']).astype('float32')
52
+ all_metadatas = all_data['metadatas']
53
+
54
+ # faiss ์ธ๋ฑ์Šค ์ƒ์„ฑ ๋ฐ ์ž„๋ฒ ๋”ฉ ์ถ”๊ฐ€
55
+ dimension = all_embeddings.shape[1]
56
+ index = faiss.IndexFlatIP(dimension) # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„๋ฅผ ์‚ฌ์šฉํ•˜๋ ค๋ฉด IndexFlatIP๋ฅผ ์‚ฌ์šฉ
57
+ index.add(all_embeddings)
58
+
59
+ return index, all_metadatas
60
+
61
+ faiss_index, all_metadatas = initialize_faiss_index(collection)
62
+
63
+
64
  # ์„ธ๊ทธ๋จผํŠธ ๋งˆ์Šคํฌ ๊ธฐ๋ฐ˜ ์ž„๋ฒ ๋”ฉ ์ถ”์ถœ
65
  def get_segmented_embedding(img, final_mask):
66
  img_array = np.array(img)
 
129
 
130
  # return structured_results
131
 
132
+ def find_similar_images(query_embedding, faiss_index, all_metadatas, top_k=5):
133
+ query_embedding = query_embedding.astype('float32').reshape(1, -1) # ์ฐจ์› ์กฐ์ • ๋ฐ ํ˜•๋ณ€ํ™˜
134
+ _, indices = faiss_index.search(query_embedding, top_k) # ์œ ์‚ฌํ•œ ๋ฒกํ„ฐ์˜ ์ธ๋ฑ์Šค๋ฅผ ๋ฐ˜ํ™˜
 
 
 
 
135
 
136
+ # ์ธ๋ฑ์Šค๋ฅผ ์ด์šฉํ•ด ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์™€ ์œ ์‚ฌ๋„ ๊ฐ€์ ธ์˜ค๊ธฐ
 
 
 
 
 
 
 
 
 
 
 
 
137
  structured_results = []
138
+ for idx in indices[0]:
139
+ metadata = all_metadatas[idx]
140
+ similarity = cosine_similarity(query_embedding, faiss_index.reconstruct(idx).reshape(1, -1))[0][0]
141
  structured_results.append({
142
  'info': metadata,
143
+ 'similarity': similarity
144
  })
145
 
146
  return structured_results