goodmodeler commited on
Commit
a7af970
·
1 Parent(s): c99bc7a

UPDATE: rag

Browse files
retrieval_augmented_generation/build_embeddings.py CHANGED
@@ -1,261 +1,246 @@
1
  #!/usr/bin/env python3
2
  """
3
- 使用BERT + FAISS构建产品描述和Slogan的嵌入数据库
4
- 支持相似性搜索和检索
 
5
  """
6
 
7
- import faiss
8
  import numpy as np
9
- import pandas as pd
 
10
  from sentence_transformers import SentenceTransformer
11
  from datasets import Dataset
12
- import pickle
13
- import json
14
- from typing import List, Dict, Tuple
15
- import os
16
 
17
- class SloganEmbeddingDB:
18
- def __init__(self, model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
19
- """
20
- 初始化BERT+FAISS数据库
21
-
22
- Args:
23
- model_name: 多语言BERT模型,支持中英文
24
- """
25
- print(f"📥 Loading BERT model: {model_name}")
26
- self.model = SentenceTransformer(model_name)
27
- self.dimension = self.model.get_sentence_embedding_dimension()
28
-
29
- # 初始化FAISS索引
30
- self.index = faiss.IndexFlatIP(self.dimension) # 内积相似度
31
- self.data = [] # 存储原始数据
32
-
33
- print(f" Model loaded. Embedding dimension: {self.dimension}")
34
-
35
- def create_sample_dataset(self) -> Dataset:
36
- """创建示例数据集"""
37
- sample_data = [
38
- # 中文品牌
39
- {"business": "肯德基", "category": "快餐", "description": "美式炸鸡快餐连锁", "slogan": "有了肯德基生活好滋味"},
40
- {"business": "麦当劳", "category": "快餐", "description": "全球知名汉堡快餐", "slogan": "我就喜欢"},
41
- {"business": "星巴克", "category": "咖啡", "description": "全球连锁咖啡店", "slogan": "启发并滋润人类精神"},
42
- {"business": "小米", "category": "电子产品", "description": "智能手机和科技产品", "slogan": "让每个人都能享受科技的乐趣"},
43
- {"business": "华为", "category": "电子产品", "description": "通信设备和智能手机", "slogan": "构建万物互联的智能世界"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # 英文品牌
46
- {"business": "Nike", "category": "运动用品", "description": "Athletic footwear and apparel", "slogan": "Just Do It"},
47
- {"business": "Apple", "category": "科技", "description": "Consumer electronics and software", "slogan": "Think Different"},
48
- {"business": "Coca-Cola", "category": "饮料", "description": "Carbonated soft drinks", "slogan": "Open Happiness"},
49
- {"business": "BMW", "category": "汽车", "description": "Luxury automobiles", "slogan": "The Ultimate Driving Machine"},
50
- {"business": "Amazon", "category": "电商", "description": "E-commerce and cloud services", "slogan": "Earth's Most Customer-Centric Company"},
 
 
 
 
 
51
 
52
- # 产品描述
53
- {"business": "智能手表", "category": "可穿戴设备", "description": "健康监测和通知功能的智能手表", "slogan": "时刻关注您的健康"},
54
- {"business": "电动汽车", "category": "新能源汽车", "description": "零排放环保电动车", "slogan": "绿色出行,智享未来"},
55
- {"business": "在线教育平台", "category": "教育科技", "description": "AI驱动的个性化学习平台", "slogan": "让学习更智能"},
56
- {"business": "健身APP", "category": "健康应用", "description": "AI私教健身指导应用", "slogan": "随时随地,专业健身"},
57
- {"business": "外卖平台", "category": "生活服务", "description": "快速便捷的餐食配送服务", "slogan": "美食到家,生活更美好"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ]
59
 
60
- return Dataset.from_pandas(pd.DataFrame(sample_data))
61
-
62
- def build_embeddings(self, dataset: Dataset):
63
- """构建嵌入向量并建立FAISS索引"""
64
- print("🔨 Building embeddings and FAISS index...")
65
 
66
- # 准备数据
67
- texts = []
68
- for item in dataset:
69
- # 组合文本:业务名称 + 类别 + 描述
70
- combined_text = f"{item['business']} {item['category']} {item['description']}"
71
- texts.append(combined_text)
72
-
73
- # 保存原始数据
74
- self.data.append({
75
- "business": item["business"],
76
- "category": item["category"],
77
- "description": item["description"],
78
- "slogan": item["slogan"],
79
- "combined_text": combined_text
80
- })
81
 
82
- # 生成嵌入向量
83
- print(f"📊 Generating embeddings for {len(texts)} items...")
84
- embeddings = self.model.encode(texts, show_progress_bar=True)
85
 
86
- # 标准化向量(用于余弦相似度)
87
- embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
88
 
89
- # 添加到FAISS索引
 
90
  self.index.add(embeddings.astype('float32'))
91
 
92
- print(f"✅ Built FAISS index with {self.index.ntotal} vectors")
 
 
 
93
 
94
- def search_similar(self, query: str, top_k: int = 5) -> List[Dict]:
95
- """搜索相似的业务描述"""
96
- print(f"🔍 Searching for: '{query}'")
 
97
 
98
- # 生成查询向量
99
- query_embedding = self.model.encode([query])
100
- query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)
101
 
102
- # FAISS搜索
103
- scores, indices = self.index.search(query_embedding.astype('float32'), top_k)
104
 
105
- # 整理结果
106
  results = []
107
- for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
108
- if idx < len(self.data):
109
- result = self.data[idx].copy()
110
- result["similarity_score"] = float(score)
111
- result["rank"] = i + 1
112
  results.append(result)
113
 
114
  return results
115
 
116
- def save_database(self, save_path: str = "./slogan_db"):
117
  """保存数据库"""
118
- os.makedirs(save_path, exist_ok=True)
119
-
120
  # 保存FAISS索引
121
- faiss.write_index(self.index, f"{save_path}/faiss.index")
122
-
123
- # 保存数据
124
- with open(f"{save_path}/data.pkl", "wb") as f:
125
- pickle.dump(self.data, f)
126
 
127
- # 保存配置
128
- config = {
129
- "model_name": self.model._modules['0'].auto_model.config.name_or_path,
130
- "dimension": self.dimension,
131
- "total_items": len(self.data)
132
- }
133
- with open(f"{save_path}/config.json", "w", encoding="utf-8") as f:
134
- json.dump(config, f, ensure_ascii=False, indent=2)
135
 
136
- print(f"💾 Database saved to {save_path}")
137
 
138
- def load_database(self, load_path: str = "./slogan_db"):
139
  """加载数据库"""
140
- print(f"📂 Loading database from {load_path}")
141
-
142
- # 加载FAISS索引
143
- self.index = faiss.read_index(f"{load_path}/faiss.index")
144
-
145
- # 加载数据
146
- with open(f"{load_path}/data.pkl", "rb") as f:
147
- self.data = pickle.load(f)
148
-
149
- print(f"✅ Loaded database with {len(self.data)} items")
 
 
 
 
 
 
 
150
 
151
- def add_new_item(self, business: str, category: str, description: str, slogan: str):
152
- """动态添加新项目"""
153
- combined_text = f"{business} {category} {description}"
154
-
155
- # 生成嵌入
156
- embedding = self.model.encode([combined_text])
157
- embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True)
158
-
159
- # 添加到索引
160
- self.index.add(embedding.astype('float32'))
161
-
162
- # 添加到数据
163
- self.data.append({
164
- "business": business,
165
- "category": category,
166
- "description": description,
167
- "slogan": slogan,
168
- "combined_text": combined_text
169
- })
170
-
171
- print(f"➕ Added new item: {business}")
172
 
173
- def generate_slogan_suggestions(self, business_description: str, top_k: int = 3) -> List[str]:
174
- """根据业务描述生成Slogan建议"""
175
- similar_items = self.search_similar(business_description, top_k)
176
 
177
- suggestions = []
178
- for item in similar_items:
179
- suggestions.append({
180
- "slogan": item["slogan"],
181
- "reference": f"{item['business']} ({item['category']})",
182
- "similarity": item["similarity_score"]
183
- })
184
 
185
- return suggestions
186
-
187
- def main():
188
- """主函数演示"""
189
- # 初始化数据库
190
- db = SloganEmbeddingDB()
191
-
192
- # 创建或加载数据
193
- if os.path.exists("./slogan_db"):
194
- print("📂 Found existing database, loading...")
195
- db.load_database()
196
- else:
197
- print("🆕 Creating new database...")
198
- dataset = db.create_sample_dataset()
199
- db.build_embeddings(dataset)
200
- db.save_database()
201
 
202
  # 测试搜索
203
  test_queries = [
204
- "智能穿戴设备健康监测",
205
- "环保新能源汽车",
206
- "人工智能学习平台",
207
- "美式快餐炸鸡",
208
- "luxury sports car",
209
- "mobile phone technology"
 
 
 
 
210
  ]
211
 
212
- print("\n" + "="*60)
213
- print("🔍 SEARCH RESULTS")
214
- print("="*60)
215
-
216
  for query in test_queries:
217
- print(f"\n🔍 Query: {query}")
218
- results = db.search_similar(query, top_k=3)
 
 
219
 
220
- for result in results:
221
- print(f" {result['rank']}. {result['business']} ({result['category']})")
222
- print(f" 描述: {result['description']}")
223
- print(f" Slogan: {result['slogan']}")
224
- print(f" 相似度: {result['similarity_score']:.3f}")
225
  print()
226
-
227
- # 测试Slogan生成建议
228
- print("\n" + "="*60)
229
- print("💡 SLOGAN SUGGESTIONS")
230
- print("="*60)
231
-
232
- new_business = "AI智能音箱语音助手设备"
233
- print(f"\n💡 为 '{new_business}' 生成Slogan建议:")
234
-
235
- suggestions = db.generate_slogan_suggestions(new_business)
236
- for i, suggestion in enumerate(suggestions, 1):
237
- print(f" {i}. \"{suggestion['slogan']}\"")
238
- print(f" 参考: {suggestion['reference']}")
239
- print(f" 相似度: {suggestion['similarity']:.3f}")
240
- print()
241
-
242
- # 演示动态添加
243
- print("\n" + "="*60)
244
- print("➕ ADDING NEW ITEM")
245
- print("="*60)
246
-
247
- db.add_new_item(
248
- business="智能眼镜",
249
- category="AR设备",
250
- description="增强现实智能眼镜产品",
251
- slogan="看见未来,触手可及"
252
- )
253
-
254
- # 重新搜索测试
255
- print(f"\n🔍 搜索 'AR增强现实产品':")
256
- results = db.search_similar("AR增强现实产品", top_k=2)
257
- for result in results:
258
- print(f" - {result['business']}: {result['slogan']} (相似度: {result['similarity_score']:.3f})")
259
 
260
  if __name__ == "__main__":
261
  main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ 简洁版BERT+FAISS标语数据库
4
+ 输入:产品/业务描述
5
+ 输出:匹配的广告标语
6
  """
7
 
 
8
  import numpy as np
9
+ import faiss
10
+ import json
11
  from sentence_transformers import SentenceTransformer
12
  from datasets import Dataset
13
+ import pandas as pd
 
 
 
14
 
15
+ class SloganDatabase:
16
+ def __init__(self):
17
+ self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
18
+ self.index = None
19
+ self.slogans = []
20
+
21
+ def create_dataset(self):
22
+ """创建标语数据集 - 珠宝首饰奢侈品领域"""
23
+ # 示例数据:[品牌, 类别, 描述, 标语]
24
+ data = [
25
+ # 顶级珠宝品牌
26
+ ["Tiffany & Co.", "jewelry", "luxury diamond jewelry and engagement rings", "A Diamond is Forever"],
27
+ ["Cartier", "luxury_jewelry", "high-end jewelry watches and accessories", "L'art de vivre"],
28
+ ["Van Cleef & Arpels", "jewelry", "French luxury jewelry and watches", "Poetry of Time"],
29
+ ["Harry Winston", "jewelry", "rare diamonds and luxury jewelry", "Rare Jewels of the World"],
30
+ ["Bulgari", "jewelry", "Italian luxury jewelry and watches", "Italian Excellence"],
31
+ ["Chopard", "jewelry", "Swiss luxury jewelry and watches", "Happy Diamonds"],
32
+ ["Graff", "jewelry", "exceptional diamonds and jewelry", "The Most Fabulous Jewels in the World"],
33
+ ["Piaget", "jewelry", "Swiss luxury watches and jewelry", "Possession"],
34
+ ["Boucheron", "jewelry", "French high jewelry and luxury watches", "Le Joaillier Depuis 1858"],
35
+ ["Mikimoto", "jewelry", "cultured pearl jewelry", "The Originator of Cultured Pearls"],
36
+
37
+ # 奢侈品牌
38
+ ["Louis Vuitton", "luxury_fashion", "luxury leather goods and fashion", "The Art of Travel"],
39
+ ["Hermès", "luxury_fashion", "French luxury goods and accessories", "Luxury in the making"],
40
+ ["Chanel", "luxury_fashion", "haute couture and luxury fashion", "Inside every woman there is a flower and a cat"],
41
+ ["Gucci", "luxury_fashion", "Italian luxury fashion and accessories", "Quality is remembered long after price is forgotten"],
42
+ ["Prada", "luxury_fashion", "Italian luxury fashion house", "Prada"],
43
+ ["Dior", "luxury_fashion", "French luxury fashion and beauty", "Miss Dior"],
44
+ ["Versace", "luxury_fashion", "Italian luxury fashion design", "Virtus"],
45
+ ["Saint Laurent", "luxury_fashion", "French luxury fashion house", "Saint Laurent Paris"],
46
+ ["Balenciaga", "luxury_fashion", "Spanish luxury fashion house", "Balenciaga"],
47
+ ["Bottega Veneta", "luxury_fashion", "Italian luxury leather goods", "When your own initials are enough"],
48
+
49
+ # 腕表品牌
50
+ ["Rolex", "luxury_watches", "Swiss luxury watches and timepieces", "Perpetual, Spirit of Excellence"],
51
+ ["Patek Philippe", "luxury_watches", "Swiss luxury watch manufacturer", "You never actually own a Patek Philippe"],
52
+ ["Audemars Piguet", "luxury_watches", "Swiss luxury watch brand", "To break the rules, you must first master them"],
53
+ ["Omega", "luxury_watches", "Swiss luxury watch manufacturer", "Precision"],
54
+ ["TAG Heuer", "luxury_watches", "Swiss luxury watches", "Don't crack under pressure"],
55
+ ["Breitling", "luxury_watches", "Swiss luxury watchmaker", "Instruments for Professionals"],
56
+ ["IWC", "luxury_watches", "Swiss luxury watch company", "Engineered for men"],
57
+ ["Jaeger-LeCoultre", "luxury_watches", "Swiss luxury watch manufacturer", "The World's Most Complicated Watches"],
58
+ ["Vacheron Constantin", "luxury_watches", "Swiss luxury watch manufacturer", "One of Not Many"],
59
+ ["A. Lange & Söhne", "luxury_watches", "German luxury watch manufacturer", "When nothing else will do"],
60
 
61
+ # 时尚首饰
62
+ ["Pandora", "fashion_jewelry", "Danish jewelry brand charm bracelets", "Be Love"],
63
+ ["Swarovski", "fashion_jewelry", "Austrian crystal jewelry and accessories", "Unleash Your Light"],
64
+ ["Daniel Wellington", "fashion_watches", "Swedish watch brand minimalist design", "Live the moment"],
65
+ ["Alex and Ani", "fashion_jewelry", "American jewelry brand spiritual bracelets", "Positive Energy"],
66
+ ["Kendra Scott", "fashion_jewelry", "American jewelry designer colorful stones", "Live colorfully"],
67
+ ["Monica Vinader", "fashion_jewelry", "British jewelry brand contemporary design", "Everyday luxury"],
68
+ ["Mejuri", "fashion_jewelry", "Canadian jewelry brand everyday luxury", "Everyday fine"],
69
+ ["Gorjana", "fashion_jewelry", "California jewelry brand layered necklaces", "Live your layer"],
70
+ ["Kate Spade", "fashion_jewelry", "American fashion accessories jewelry", "Live colorfully"],
71
+ ["Marc Jacobs", "fashion_jewelry", "American fashion designer accessories", "Marc Jacobs"],
72
 
73
+ # 珠宝定制
74
+ ["Blue Nile", "diamond_jewelry", "online diamond jewelry retailer", "Extraordinary diamonds for extraordinary moments"],
75
+ ["James Allen", "diamond_jewelry", "online engagement ring retailer", "See it. Love it. Own it."],
76
+ ["Brilliant Earth", "diamond_jewelry", "ethical diamond jewelry", "Brilliant Earth"],
77
+ ["With Clarity", "diamond_jewelry", "lab-grown diamond jewelry", "Diamonds. Redefined."],
78
+ ["Clean Origin", "diamond_jewelry", "lab-created diamond jewelry", "Grown with love"],
79
+ ["Ritani", "diamond_jewelry", "engagement rings and wedding bands", "Love is in the details"],
80
+ ["Vrai", "diamond_jewelry", "lab-grown diamond jewelry", "Created, not mined"],
81
+ ["Catbird", "jewelry", "Brooklyn-based jewelry designer", "Made in Brooklyn"],
82
+ ["Wwake", "jewelry", "contemporary fine jewelry designer", "Wwake"],
83
+ ["Jacquie Aiche", "jewelry", "California jewelry designer bohemian luxury", "Jacquie Aiche"],
84
+
85
+ # 中国珠宝品牌
86
+ ["周大福", "jewelry", "香港珠宝品牌黄金钻石", "心意足金"],
87
+ ["周生生", "jewelry", "香港珠宝品牌传统工艺", "传承经典"],
88
+ ["老凤祥", "jewelry", "中国传统珠宝品牌黄金首饰", "老凤祥,真金不怕火炼"],
89
+ ["六福珠宝", "jewelry", "香港珠宝品牌时尚设计", "六福临门"],
90
+ ["潘多拉", "jewelry", "丹麦珠宝品牌串珠手链", "表达你的故事"],
91
+ ["周大生", "jewelry", "中国珠宝品牌钻石首饰", "爱就在一起"],
92
+ ["金伯利", "jewelry", "中国钻石珠���品牌", "只为更好的你"],
93
+ ["戴比尔斯", "diamond_jewelry", "钻石开采珠宝品牌", "钻石恒久远,一颗永流传"],
94
+ ["施华洛世奇", "crystal_jewelry", "奥地利水晶珠宝品牌", "释放你的光芒"],
95
+ ["谢瑞麟", "jewelry", "香港珠宝设计师品牌", "艺术珠宝"],
96
+
97
+ # 奢侈品配饰
98
+ ["Goyard", "luxury_accessories", "French luxury leather goods", "Goyard"],
99
+ ["Moynat", "luxury_accessories", "French luxury leather goods", "Moynat"],
100
+ ["Berluti", "luxury_accessories", "French luxury leather goods", "Berluti"],
101
+ ["Valextra", "luxury_accessories", "Italian luxury leather goods", "Milanese excellence since 1937"],
102
+ ["Loewe", "luxury_accessories", "Spanish luxury leather goods", "Craft"],
103
+ ["Brunello Cucinelli", "luxury_fashion", "Italian luxury fashion cashmere", "Humanistic Enterprise"],
104
+ ["Loro Piana", "luxury_fashion", "Italian luxury textile and clothing", "Excellence in natural fibers"],
105
+ ["Kiton", "luxury_fashion", "Italian luxury menswear", "The most beautiful thing made by man"],
106
+ ["Zegna", "luxury_fashion", "Italian luxury menswear", "What makes a man"],
107
+ ["Brioni", "luxury_fashion", "Italian luxury menswear", "Roman style"],
108
+
109
+ # 新兴奢侈品牌
110
+ ["Jacquemus", "luxury_fashion", "French luxury fashion house", "La Montagne"],
111
+ ["Ganni", "luxury_fashion", "Danish fashion brand", "Ganni"],
112
+ ["Staud", "luxury_fashion", "American fashion brand", "Staud"],
113
+ ["Cult Gaia", "luxury_accessories", "American accessories brand", "Cult Gaia"],
114
+ ["Rosantica", "jewelry", "Italian jewelry brand", "Rosantica"],
115
+ ["Alighieri", "jewelry", "British jewelry brand", "The Inferno"],
116
+ ["Lizzie Fortunato", "jewelry", "American jewelry brand", "Lizzie Fortunato"],
117
+ ["Aurate", "jewelry", "American jewelry brand", "Accessible luxury"],
118
+ ["AUrate New York", "jewelry", "New York jewelry brand", "Radically responsible luxury"],
119
+ ["Missoma", "jewelry", "British jewelry brand", "Missoma"]
120
  ]
121
 
122
+ # 转换为DataFrame
123
+ df = pd.DataFrame(data, columns=['brand', 'category', 'description', 'slogan'])
 
 
 
124
 
125
+ # 创建搜索文本(组合描述信息)
126
+ df['search_text'] = df['brand'] + ' ' + df['category'] + ' ' + df['description']
127
+
128
+ return df.to_dict('records')
129
+
130
+ def build_index(self, data):
131
+ """构建FAISS索引"""
132
+ print("🔨 Building FAISS index...")
 
 
 
 
 
 
 
133
 
134
+ # 提取搜索文本
135
+ texts = [item['search_text'] for item in data]
 
136
 
137
+ # 生成embeddings
138
+ embeddings = self.encoder.encode(texts, show_progress_bar=True)
139
 
140
+ # 构建索引
141
+ self.index = faiss.IndexFlatIP(384) # 使用内积相似度
142
  self.index.add(embeddings.astype('float32'))
143
 
144
+ # 保存数据
145
+ self.slogans = data
146
+
147
+ print(f"✅ Index built with {len(data)} slogans")
148
 
149
+ def search(self, query, k=5):
150
+ """搜索相似标语"""
151
+ if not self.index:
152
+ raise ValueError("Index not built yet!")
153
 
154
+ # 编码查询
155
+ query_embedding = self.encoder.encode([query])
 
156
 
157
+ # 搜索
158
+ scores, indices = self.index.search(query_embedding.astype('float32'), k)
159
 
160
+ # 返回结果
161
  results = []
162
+ for score, idx in zip(scores[0], indices[0]):
163
+ if idx < len(self.slogans):
164
+ result = self.slogans[idx].copy()
165
+ result['similarity_score'] = float(score)
 
166
  results.append(result)
167
 
168
  return results
169
 
170
+ def save(self, path="slogan_db"):
171
  """保存数据库"""
 
 
172
  # 保存FAISS索引
173
+ faiss.write_index(self.index, f"{path}.faiss")
 
 
 
 
174
 
175
+ # 保存标语数据
176
+ with open(f"{path}.json", 'w', encoding='utf-8') as f:
177
+ json.dump(self.slogans, f, ensure_ascii=False, indent=2)
 
 
 
 
 
178
 
179
+ print(f"💾 Database saved to {path}")
180
 
181
+ def load(self, path="slogan_db"):
182
  """加载数据库"""
183
+ try:
184
+ # 加载FAISS索引
185
+ self.index = faiss.read_index(f"{path}.faiss")
186
+
187
+ # 加载标语数据
188
+ with open(f"{path}.json", 'r', encoding='utf-8') as f:
189
+ self.slogans = json.load(f)
190
+
191
+ print(f"📂 Database loaded from {path}")
192
+ return True
193
+ except:
194
+ print(f"❌ Failed to load database from {path}")
195
+ return False
196
+
197
+ def main():
198
+ """主函数"""
199
+ print("🚀 Creating Slogan Database...")
200
 
201
+ # 初始化
202
+ db = SloganDatabase()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ # 尝试加载现有数据库
205
+ if not db.load():
206
+ print("📊 Creating new database...")
207
 
208
+ # 创建数据集
209
+ data = db.create_dataset()
 
 
 
 
 
210
 
211
+ # 构建索引
212
+ db.build_index(data)
213
+
214
+ # 保存数据库
215
+ db.save()
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  # 测试搜索
218
  test_queries = [
219
+ "钻石订婚戒指",
220
+ "奢侈品手袋",
221
+ "瑞士手表品牌",
222
+ "珍珠首饰",
223
+ "黄金项链",
224
+ "时尚耳环",
225
+ "luxury jewelry brand",
226
+ "designer handbag",
227
+ "crystal accessories",
228
+ "wedding rings"
229
  ]
230
 
231
+ print("\n🔍 Testing searches...")
 
 
 
232
  for query in test_queries:
233
+ print(f"\n查询: {query}")
234
+ print("-" * 40)
235
+
236
+ results = db.search(query, k=3)
237
 
238
+ for i, result in enumerate(results, 1):
239
+ print(f"{i}. {result['brand']} ({result['category']})")
240
+ print(f" 描述: {result['description']}")
241
+ print(f" 标语: {result['slogan']}")
242
+ print(f" 相似度: {result['similarity_score']:.3f}")
243
  print()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  if __name__ == "__main__":
246
  main()