Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
简洁版BERT+FAISS标语数据库 | |
输入:产品/业务描述 | |
输出:匹配的广告标语 | |
""" | |
import numpy as np | |
import faiss | |
import json | |
from sentence_transformers import SentenceTransformer | |
from datasets import Dataset | |
import pandas as pd | |
class SloganDatabase: | |
def __init__(self): | |
self.encoder = SentenceTransformer('all-MiniLM-L6-v2') | |
self.index = None | |
self.slogans = [] | |
def create_dataset(self): | |
"""创建标语数据集 - 珠宝首饰奢侈品领域""" | |
# 示例数据:[品牌, 类别, 描述, 标语] | |
data = [ | |
# 顶级珠宝品牌 | |
["Tiffany & Co.", "jewelry", "luxury diamond jewelry and engagement rings", "A Diamond is Forever"], | |
["Cartier", "luxury_jewelry", "high-end jewelry watches and accessories", "L'art de vivre"], | |
["Van Cleef & Arpels", "jewelry", "French luxury jewelry and watches", "Poetry of Time"], | |
["Harry Winston", "jewelry", "rare diamonds and luxury jewelry", "Rare Jewels of the World"], | |
["Bulgari", "jewelry", "Italian luxury jewelry and watches", "Italian Excellence"], | |
["Chopard", "jewelry", "Swiss luxury jewelry and watches", "Happy Diamonds"], | |
["Graff", "jewelry", "exceptional diamonds and jewelry", "The Most Fabulous Jewels in the World"], | |
["Piaget", "jewelry", "Swiss luxury watches and jewelry", "Possession"], | |
["Boucheron", "jewelry", "French high jewelry and luxury watches", "Le Joaillier Depuis 1858"], | |
["Mikimoto", "jewelry", "cultured pearl jewelry", "The Originator of Cultured Pearls"], | |
# 奢侈品牌 | |
["Louis Vuitton", "luxury_fashion", "luxury leather goods and fashion", "The Art of Travel"], | |
["Hermès", "luxury_fashion", "French luxury goods and accessories", "Luxury in the making"], | |
["Chanel", "luxury_fashion", "haute couture and luxury fashion", "Inside every woman there is a flower and a cat"], | |
["Gucci", "luxury_fashion", "Italian luxury fashion and accessories", "Quality is remembered long after price is forgotten"], | |
["Prada", "luxury_fashion", "Italian luxury fashion house", "Prada"], | |
["Dior", "luxury_fashion", "French luxury fashion and beauty", "Miss Dior"], | |
["Versace", "luxury_fashion", "Italian luxury fashion design", "Virtus"], | |
["Saint Laurent", "luxury_fashion", "French luxury fashion house", "Saint Laurent Paris"], | |
["Balenciaga", "luxury_fashion", "Spanish luxury fashion house", "Balenciaga"], | |
["Bottega Veneta", "luxury_fashion", "Italian luxury leather goods", "When your own initials are enough"], | |
# 腕表品牌 | |
["Rolex", "luxury_watches", "Swiss luxury watches and timepieces", "Perpetual, Spirit of Excellence"], | |
["Patek Philippe", "luxury_watches", "Swiss luxury watch manufacturer", "You never actually own a Patek Philippe"], | |
["Audemars Piguet", "luxury_watches", "Swiss luxury watch brand", "To break the rules, you must first master them"], | |
["Omega", "luxury_watches", "Swiss luxury watch manufacturer", "Precision"], | |
["TAG Heuer", "luxury_watches", "Swiss luxury watches", "Don't crack under pressure"], | |
["Breitling", "luxury_watches", "Swiss luxury watchmaker", "Instruments for Professionals"], | |
["IWC", "luxury_watches", "Swiss luxury watch company", "Engineered for men"], | |
["Jaeger-LeCoultre", "luxury_watches", "Swiss luxury watch manufacturer", "The World's Most Complicated Watches"], | |
["Vacheron Constantin", "luxury_watches", "Swiss luxury watch manufacturer", "One of Not Many"], | |
["A. Lange & Söhne", "luxury_watches", "German luxury watch manufacturer", "When nothing else will do"], | |
# 时尚首饰 | |
["Pandora", "fashion_jewelry", "Danish jewelry brand charm bracelets", "Be Love"], | |
["Swarovski", "fashion_jewelry", "Austrian crystal jewelry and accessories", "Unleash Your Light"], | |
["Daniel Wellington", "fashion_watches", "Swedish watch brand minimalist design", "Live the moment"], | |
["Alex and Ani", "fashion_jewelry", "American jewelry brand spiritual bracelets", "Positive Energy"], | |
["Kendra Scott", "fashion_jewelry", "American jewelry designer colorful stones", "Live colorfully"], | |
["Monica Vinader", "fashion_jewelry", "British jewelry brand contemporary design", "Everyday luxury"], | |
["Mejuri", "fashion_jewelry", "Canadian jewelry brand everyday luxury", "Everyday fine"], | |
["Gorjana", "fashion_jewelry", "California jewelry brand layered necklaces", "Live your layer"], | |
["Kate Spade", "fashion_jewelry", "American fashion accessories jewelry", "Live colorfully"], | |
["Marc Jacobs", "fashion_jewelry", "American fashion designer accessories", "Marc Jacobs"], | |
# 珠宝定制 | |
["Blue Nile", "diamond_jewelry", "online diamond jewelry retailer", "Extraordinary diamonds for extraordinary moments"], | |
["James Allen", "diamond_jewelry", "online engagement ring retailer", "See it. Love it. Own it."], | |
["Brilliant Earth", "diamond_jewelry", "ethical diamond jewelry", "Brilliant Earth"], | |
["With Clarity", "diamond_jewelry", "lab-grown diamond jewelry", "Diamonds. Redefined."], | |
["Clean Origin", "diamond_jewelry", "lab-created diamond jewelry", "Grown with love"], | |
["Ritani", "diamond_jewelry", "engagement rings and wedding bands", "Love is in the details"], | |
["Vrai", "diamond_jewelry", "lab-grown diamond jewelry", "Created, not mined"], | |
["Catbird", "jewelry", "Brooklyn-based jewelry designer", "Made in Brooklyn"], | |
["Wwake", "jewelry", "contemporary fine jewelry designer", "Wwake"], | |
["Jacquie Aiche", "jewelry", "California jewelry designer bohemian luxury", "Jacquie Aiche"], | |
# 中国珠宝品牌 | |
["周大福", "jewelry", "香港珠宝品牌黄金钻石", "心意足金"], | |
["周生生", "jewelry", "香港珠宝品牌传统工艺", "传承经典"], | |
["老凤祥", "jewelry", "中国传统珠宝品牌黄金首饰", "老凤祥,真金不怕火炼"], | |
["六福珠宝", "jewelry", "香港珠宝品牌时尚设计", "六福临门"], | |
["潘多拉", "jewelry", "丹麦珠宝品牌串珠手链", "表达你的故事"], | |
["周大生", "jewelry", "中国珠宝品牌钻石首饰", "爱就在一起"], | |
["金伯利", "jewelry", "中国钻石珠宝品牌", "只为更好的你"], | |
["戴比尔斯", "diamond_jewelry", "钻石开采珠宝品牌", "钻石恒久远,一颗永流传"], | |
["施华洛世奇", "crystal_jewelry", "奥地利水晶珠宝品牌", "释放你的光芒"], | |
["谢瑞麟", "jewelry", "香港珠宝设计师品牌", "艺术珠宝"], | |
# 奢侈品配饰 | |
["Goyard", "luxury_accessories", "French luxury leather goods", "Goyard"], | |
["Moynat", "luxury_accessories", "French luxury leather goods", "Moynat"], | |
["Berluti", "luxury_accessories", "French luxury leather goods", "Berluti"], | |
["Valextra", "luxury_accessories", "Italian luxury leather goods", "Milanese excellence since 1937"], | |
["Loewe", "luxury_accessories", "Spanish luxury leather goods", "Craft"], | |
["Brunello Cucinelli", "luxury_fashion", "Italian luxury fashion cashmere", "Humanistic Enterprise"], | |
["Loro Piana", "luxury_fashion", "Italian luxury textile and clothing", "Excellence in natural fibers"], | |
["Kiton", "luxury_fashion", "Italian luxury menswear", "The most beautiful thing made by man"], | |
["Zegna", "luxury_fashion", "Italian luxury menswear", "What makes a man"], | |
["Brioni", "luxury_fashion", "Italian luxury menswear", "Roman style"], | |
# 新兴奢侈品牌 | |
["Jacquemus", "luxury_fashion", "French luxury fashion house", "La Montagne"], | |
["Ganni", "luxury_fashion", "Danish fashion brand", "Ganni"], | |
["Staud", "luxury_fashion", "American fashion brand", "Staud"], | |
["Cult Gaia", "luxury_accessories", "American accessories brand", "Cult Gaia"], | |
["Rosantica", "jewelry", "Italian jewelry brand", "Rosantica"], | |
["Alighieri", "jewelry", "British jewelry brand", "The Inferno"], | |
["Lizzie Fortunato", "jewelry", "American jewelry brand", "Lizzie Fortunato"], | |
["Aurate", "jewelry", "American jewelry brand", "Accessible luxury"], | |
["AUrate New York", "jewelry", "New York jewelry brand", "Radically responsible luxury"], | |
["Missoma", "jewelry", "British jewelry brand", "Missoma"] | |
] | |
# 转换为DataFrame | |
df = pd.DataFrame(data, columns=['brand', 'category', 'description', 'slogan']) | |
# 创建搜索文本(组合描述信息) | |
df['search_text'] = df['brand'] + ' ' + df['category'] + ' ' + df['description'] | |
return df.to_dict('records') | |
def build_index(self, data): | |
"""构建FAISS索引""" | |
print("🔨 Building FAISS index...") | |
# 提取搜索文本 | |
texts = [item['search_text'] for item in data] | |
# 生成embeddings | |
embeddings = self.encoder.encode(texts, show_progress_bar=True) | |
# 构建索引 | |
self.index = faiss.IndexFlatIP(384) # 使用内积相似度 | |
self.index.add(embeddings.astype('float32')) | |
# 保存数据 | |
self.slogans = data | |
print(f"✅ Index built with {len(data)} slogans") | |
def search(self, query, k=5): | |
"""搜索相似标语""" | |
if not self.index: | |
raise ValueError("Index not built yet!") | |
# 编码查询 | |
query_embedding = self.encoder.encode([query]) | |
# 搜索 | |
scores, indices = self.index.search(query_embedding.astype('float32'), k) | |
# 返回结果 | |
results = [] | |
for score, idx in zip(scores[0], indices[0]): | |
if idx < len(self.slogans): | |
result = self.slogans[idx].copy() | |
result['similarity_score'] = float(score) | |
results.append(result) | |
return results | |
def save(self, path="slogan_db"): | |
"""保存数据库""" | |
# 保存FAISS索引 | |
faiss.write_index(self.index, f"{path}.faiss") | |
# 保存标语数据 | |
with open(f"{path}.json", 'w', encoding='utf-8') as f: | |
json.dump(self.slogans, f, ensure_ascii=False, indent=2) | |
print(f"💾 Database saved to {path}") | |
def load(self, path="slogan_db"): | |
"""加载数据库""" | |
try: | |
# 加载FAISS索引 | |
self.index = faiss.read_index(f"{path}.faiss") | |
# 加载标语数据 | |
with open(f"{path}.json", 'r', encoding='utf-8') as f: | |
self.slogans = json.load(f) | |
print(f"📂 Database loaded from {path}") | |
return True | |
except: | |
print(f"❌ Failed to load database from {path}") | |
return False | |
def main(): | |
"""主函数""" | |
print("🚀 Creating Slogan Database...") | |
# 初始化 | |
db = SloganDatabase() | |
# 尝试加载现有数据库 | |
if not db.load(): | |
print("📊 Creating new database...") | |
# 创建数据集 | |
data = db.create_dataset() | |
# 构建索引 | |
db.build_index(data) | |
# 保存数据库 | |
db.save() | |
# 测试搜索 | |
test_queries = [ | |
"钻石订婚戒指", | |
"奢侈品手袋", | |
"瑞士手表品牌", | |
"珍珠首饰", | |
"黄金项链", | |
"时尚耳环", | |
"luxury jewelry brand", | |
"designer handbag", | |
"crystal accessories", | |
"wedding rings" | |
] | |
print("\n🔍 Testing searches...") | |
for query in test_queries: | |
print(f"\n查询: {query}") | |
print("-" * 40) | |
results = db.search(query, k=3) | |
for i, result in enumerate(results, 1): | |
print(f"{i}. {result['brand']} ({result['category']})") | |
print(f" 描述: {result['description']}") | |
print(f" 标语: {result['slogan']}") | |
print(f" 相似度: {result['similarity_score']:.3f}") | |
print() | |
if __name__ == "__main__": | |
main() |