sparksearch-demo / SmartSearch /search_manager.py
teddyllm's picture
Upload 20 files
bd3532f verified
from typing import List
from embedding_provider import EmbeddingProvider
from database.annoydb import AnnoyDB
class SearchManager:
def __init__(
self,
embedding_provider: EmbeddingProvider,
documents: List[str],
semantic_weight: float = 0.7,
keyword_weight: float = 0.3
) -> None:
"""Smart Search Manager
Args:
embedding_provider (EmbeddingProvider): embedding provider
documents (List[str]): list of documents
semantic_weight (float, optional): _description_. Defaults to 0.7.
keyword_weight (float, optional): _description_. Defaults to 0.3.
"""
self.embedding_provider = embedding_provider
self.semantic_embeddings = embedding_provider.embed_documents(documents)
# Vector Database Setup
self.vector_db = AnnoyDB(
embedding_dim=self.semantic_embeddings.shape[1]
)
for emb, doc in zip(self.semantic_embeddings, documents):
self.vector_db.add_item(emb, doc)
self.vector_db.build()
# Keyword Search Setup
self.keyword_search = KeywordSearchProvider(documents)
# Weights for hybrid search
self.semantic_weight = semantic_weight
self.keyword_weight = keyword_weight
self.documents = documents